Пример #1
0
 def create():
     options = ChromeOptions()
     options.headless = driver_is_headless
     driver = create_simple_selenium_web_driver(driver_options=options)
     drivers.append(driver)
     return driver
Пример #2
0
def get_web_driver(email,
                   password,
                   headless=False,
                   mfa_method=None,
                   mfa_input_callback=None,
                   wait_for_sync=True):
    if headless and mfa_method is None:
        warnings.warn(
            "Using headless mode without specifying an MFA method"
            "is unlikely to lead to a successful login. Defaulting --mfa-method=sms"
        )
        mfa_method = "sms"

    zip_type = ""
    executable_path = os.getcwd() + os.path.sep + 'chromedriver'
    if _platform in ['win32', 'win64']:
        executable_path += '.exe'

    zip_type = CHROME_ZIP_TYPES.get(_platform)

    if not os.path.exists(executable_path):
        zip_file_url = CHROME_DRIVER_BASE_URL % (CHROME_DRIVER_VERSION,
                                                 zip_type)
        request = requests.get(zip_file_url)

        if request.status_code != 200:
            raise RuntimeError(
                'Error finding chromedriver at %r, status = %d' %
                (zip_file_url, request.status_code))

        zip_file = zipfile.ZipFile(io.BytesIO(request.content))
        zip_file.extractall()
        os.chmod(executable_path, 0o755)

    chrome_options = ChromeOptions()
    if headless:
        chrome_options.add_argument('headless')
        chrome_options.add_argument('no-sandbox')
        chrome_options.add_argument('disable-dev-shm-usage')
        chrome_options.add_argument('disable-gpu')
        # chrome_options.add_argument("--window-size=1920x1080")

    driver = Chrome(chrome_options=chrome_options,
                    executable_path="%s" % executable_path)
    driver.get(
        "https://accounts.intuit.com/index.html?redirect_url=https%3A%2F%2Fmint.intuit.com%2Foverview.event"
    )
    driver.implicitly_wait(20)  # seconds

    driver.find_element_by_id("ius-userid").send_keys(email)
    driver.find_element_by_id("ius-password").send_keys(password)
    driver.find_element_by_id("ius-sign-in-submit-btn").submit()

    # Wait until logged in, just in case we need to deal with MFA.
    while not driver.current_url.startswith(
            'https://mint.intuit.com/overview.event'):
        time.sleep(1)

        driver.implicitly_wait(1)  # seconds
        try:
            try:
                if mfa_method == "app":
                    # mfa_input_callback should be a function that returns the 2fa barcode.
                    two_factor_code = mfa_input_callback()
                    driver.find_element_by_id("ius-mfa-soft-token").send_keys(
                        two_factor_code)
                    driver.find_element_by_id(
                        "ius-mfa-soft-token-submit-btn").submit()
                else:
                    driver.find_element_by_id('ius-mfa-options-form')

                    mfa_method_option = driver.find_element_by_id(
                        'ius-mfa-option-{}'.format(mfa_method))
                    mfa_method_option.click()
                    mfa_method_submit = driver.find_element_by_id(
                        "ius-mfa-options-submit-btn")
                    mfa_method_submit.click()

                    mfa_code = (mfa_input_callback or
                                input)("Please enter your 6-digit MFA code: ")
                    mfa_code_input = driver.find_element_by_id(
                        "ius-mfa-confirm-code")
                    mfa_code_input.send_keys(mfa_code)

                    mfa_code_submit = driver.find_element_by_id(
                        "ius-mfa-otp-submit-btn")
                    mfa_code_submit.click()
            except NoSuchElementException:
                pass
            except Exception as e:
                mfa_method = None
                warnings.warn(
                    "Giving up on handling MFA. Please complete "
                    "the MFA process manually in the browser. Exception: " +
                    repr(e))
        except NoSuchElementException:
            pass
        finally:
            driver.implicitly_wait(20)  # seconds

    # Wait until the overview page has actually loaded, and if wait_for_sync==True, sync has completed.
    if wait_for_sync:
        try:
            driver.implicitly_wait(5)
            status_message = driver.find_element_by_css_selector(
                ".SummaryView .message")
            WebDriverWait(driver, 5 *
                          60).until(lambda x: "Account refresh complete" in
                                    status_message.get_attribute('innerHTML'))
        except TimeoutException:
            warnings.warn(
                "Mint sync apparently incomplete after 5 minutes. Data "
                "retrieved may not be current.")
    else:
        driver.find_element_by_id("transaction")

    return driver
Пример #3
0
# cop a concert from 大麦.com at a specific time.
# You need to configure cookie_location (option.add_argument), time, damai_url...
from selenium import webdriver
import time
import schedule
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ChromeOptions
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
option = ChromeOptions()

option.add_argument(
    '--user-data-dir=C:/Users/chris/AppData/Local/Google/Chrome/User Data'
)  #based on your cookie location(Chrome)
# paste your concert url above
damai_url = 'https://detail.damai.cn/item.htm?spm=a2oeg.search_category.0.0.436a5389MtFkee&id=598066779141&clicktitle=%E6%B1%AA%E8%8B%8F%E6%B3%B72019%E2%80%9C%E9%93%B6%E6%B2%B3%E6%BC%AB%E6%B8%B8%E2%80%9D%E5%B7%A1%E5%9B%9E%E6%BC%94%E5%94%B1%E4%BC%9A%E4%B8%8A%E6%B5%B7%E7%AB%99-%E7%94%9F%E6%97%A5%E7%89%B9%E5%88%AB%E5%9C%BA'
target_time = "23:18:59 "  #"23:18:59"

option.add_experimental_option('excludeSwitches', ['enable-automation'])
desired_capabilities = DesiredCapabilities.CHROME
desired_capabilities["pageLoadStrategy"] = "none"  #eager
prefs = {
    "profile.managed_default_content_settings.images": 2
}  # 1:loading picture 2: forbidden loading message
option.add_experimental_option("prefs", prefs)
driver = webdriver.Chrome(options=option)

book = 'div[class=buybtn]'  #/html/body/div[2]/div/div[1]/div[1]/div/div[2]/div[3]/div[10]/div
people = 'span.next-checkbox'  #span.next-checkbox  span.next-checkbox > input:nth-child(2)
submit = '.submit-wrapper > button:nth-child(1)'  #//button[@type="button"][contains(.,"同意以上协议并提交订单")]
Пример #4
0
            td = div[1].find_element_by_css_selector('td')
            f.write(td.text)
            f.write('\n\n') 

        except IndexError:
            td = div[0].find_elements_by_css_selector('td')
            f.write(td[0].text)
            f.write('\n\n')
                
            return get_table(j)
    
    f.close()
    return 0
    

options = ChromeOptions()
options.headless = True


num = 1
browser = webdriver.Chrome()
url = []


for num in range(15):
    time.sleep(1)
    browser.get(f'https://www.green-japan.com/search_key/01?key=jr8w4uiffy2sdol4dg1b&keyword=&page={num + 1}')
    
    if num == 14:
            for j in range(8):
                div = browser.find_elements_by_css_selector('.card-info__wrapper')
Пример #5
0
def get_web_driver(email,
                   password,
                   headless=False,
                   mfa_method=None,
                   mfa_input_callback=None,
                   wait_for_sync=True,
                   wait_for_sync_timeout=5 * 60,
                   session_path=None,
                   imap_account=None,
                   imap_password=None,
                   imap_server=None,
                   imap_folder="INBOX"):
    if headless and mfa_method is None:
        warnings.warn(
            "Using headless mode without specifying an MFA method"
            "is unlikely to lead to a successful login. Defaulting --mfa-method=sms"
        )
        mfa_method = "sms"

    zip_type = ""
    executable_path = os.getcwd() + os.path.sep + 'chromedriver'
    if _platform in ['win32', 'win64']:
        executable_path += '.exe'

    zip_type = CHROME_ZIP_TYPES.get(_platform)

    if not os.path.exists(executable_path):
        zip_file_url = CHROME_DRIVER_BASE_URL % (CHROME_DRIVER_VERSION,
                                                 zip_type)
        request = requests.get(zip_file_url)

        if request.status_code != 200:
            raise RuntimeError(
                'Error finding chromedriver at %r, status = %d' %
                (zip_file_url, request.status_code))

        zip_file = zipfile.ZipFile(io.BytesIO(request.content))
        zip_file.extractall()
        os.chmod(executable_path, 0o755)

    chrome_options = ChromeOptions()
    if headless:
        chrome_options.add_argument('headless')
        chrome_options.add_argument('no-sandbox')
        chrome_options.add_argument('disable-dev-shm-usage')
        chrome_options.add_argument('disable-gpu')
        # chrome_options.add_argument("--window-size=1920x1080")
    if session_path is not None:
        chrome_options.add_argument("user-data-dir=%s" % session_path)

    driver = Chrome(chrome_options=chrome_options,
                    executable_path="%s" % executable_path)
    driver.get("https://www.mint.com")
    driver.implicitly_wait(20)  # seconds
    try:
        element = driver.find_element_by_link_text("Sign in")
    except NoSuchElementException:
        # when user has cookies, a slightly different front page appears
        driver.implicitly_wait(0)  # seconds
        element = driver.find_element_by_link_text("Sign in")
        driver.implicitly_wait(20)  # seconds
    element.click()
    time.sleep(1)
    email_input = driver.find_element_by_id("ius-userid")
    # It's possible that the user clicked "remember me" at some point, causing
    # the email to already be present. If anything is in the input, clear it
    # and use the provided email, just to be safe.
    # email_input.setAttribute("value", "")
    email_input.clear()
    email_input.send_keys(email)
    driver.find_element_by_id("ius-password").send_keys(password)
    driver.find_element_by_id("ius-sign-in-submit-btn").submit()

    # Wait until logged in, just in case we need to deal with MFA.
    while not driver.current_url.startswith(
            'https://mint.intuit.com/overview.event'):
        # An implicitly_wait is also necessary here to avoid getting stuck on
        # find_element_by_id while the page is still in transition.
        driver.implicitly_wait(1)
        time.sleep(1)

        # bypass "Let's add your current mobile number" interstitial page
        try:
            skip_for_now = driver.find_element_by_id(
                'ius-verified-user-update-btn-skip')
            skip_for_now.click()
        except (NoSuchElementException, StaleElementReferenceException,
                ElementNotVisibleException):
            pass

        driver.implicitly_wait(1)  # seconds
        try:
            driver.find_element_by_id('ius-mfa-options-form')
            try:
                mfa_method_option = driver.find_element_by_id(
                    'ius-mfa-option-{}'.format(mfa_method))
                mfa_method_option.click()
                mfa_method_submit = driver.find_element_by_id(
                    "ius-mfa-options-submit-btn")
                mfa_method_submit.click()

                if mfa_method == 'email' and imap_account:
                    mfa_code = get_email_code(imap_account,
                                              imap_password,
                                              imap_server,
                                              imap_folder=imap_folder)
                else:
                    mfa_code = (mfa_input_callback or
                                input)("Please enter your 6-digit MFA code: ")
                mfa_code_input = driver.find_element_by_id(
                    "ius-mfa-confirm-code")
                mfa_code_input.send_keys(mfa_code)

                mfa_code_submit = driver.find_element_by_id(
                    "ius-mfa-otp-submit-btn")
                mfa_code_submit.click()
            except Exception:  # if anything goes wrong for any reason, give up on MFA
                mfa_method = None
                warnings.warn("Giving up on handling MFA. Please complete "
                              "the MFA process manually in the browser.")
        except NoSuchElementException:
            pass
        finally:
            driver.implicitly_wait(20)  # seconds

    # Wait until the overview page has actually loaded, and if wait_for_sync==True, sync has completed.
    if wait_for_sync:
        try:
            # Status message might not be present straight away. Seems to be due
            # to dynamic content (client side rendering).
            status_message = WebDriverWait(driver, 30).until(
                expected_conditions.visibility_of_element_located(
                    (By.CSS_SELECTOR, ".SummaryView .message")))
            WebDriverWait(driver, wait_for_sync_timeout).until(
                lambda x: "Account refresh complete" in status_message.
                get_attribute('innerHTML'))
        except (TimeoutException, StaleElementReferenceException):
            warnings.warn("Mint sync apparently incomplete after timeout. "
                          "Data retrieved may not be current.")
    else:
        driver.find_element_by_id("transaction")

    return driver
Пример #6
0
def browser(name=None, driver_path=None, grid_url=None):
    """
    Run class initialization method, the default is proper
    to drive the Firefox browser. Of course, you can also
    pass parameter for other browser, Chrome browser for the "Chrome",
    the Internet Explorer browser for "internet explorer" or "ie".
    :param name: Browser name
    :param driver_path: Browser driver path
    :param grid_url: Either a string representing URL of the remote server or a custom
             remote_connection.RemoteConnection object.
    :return:
    """
    CHROMEDRIVER = "chromedriver"
    FIREFOXDRIVER = "geckodriver"
    IEDRIVER = "IEDriverServer.exe"
    OPERADRIVER = "operadriver"
    EDGEDRIVER = "MicrosoftWebDriver.exe"
    SAFAIRDRIVER = "/usr/bin/safaridriver"

    # Prevention of detection
    option = ChromeOptions()
    option.add_experimental_option('excludeSwitches', ['enable-automation'])
    if name is None:
        name = "chrome"

    if name in ["firefox", "ff"]:
        if driver_path is None:
            driver_path = FIREFOXDRIVER
        if grid_url is not None:
            return webdriver.Remote(
                command_executor=grid_url,
                desired_capabilities=DesiredCapabilities.FIREFOX.copy())
        return webdriver.Firefox(executable_path=driver_path)

    elif name in ["chrome", "google chrome", "gc"]:
        if driver_path is None:
            driver_path = CHROMEDRIVER
        if grid_url is not None:
            return webdriver.Remote(
                command_executor=grid_url,
                desired_capabilities=DesiredCapabilities.CHROME.copy())
        driver = webdriver.Chrome(options=option, executable_path=driver_path)
        driver.execute_cdp_cmd(
            "Page.addScriptToEvaluateOnNewDocument", {
                "source":
                """
            Object.defineProperty(navigator, 'webdriver', {
            get: () => undefined
            })"""
            })
        return driver

    elif name in ["internet explorer", "ie", "IE"]:
        if driver_path is None:
            driver_path = IEDRIVER
        return webdriver.Ie(executable_path=driver_path)

    elif name == "opera":
        if driver_path is None:
            driver_path = OPERADRIVER
        return webdriver.Opera(executable_path=driver_path)

    elif name == "chrome_headless":
        if driver_path is None:
            driver_path = CHROMEDRIVER
        chrome_options = CH_Options()
        chrome_options.add_argument('--headless')
        driver = webdriver.Chrome(chrome_options=chrome_options,
                                  options=option,
                                  executable_path=driver_path)
        driver.execute_cdp_cmd(
            "Page.addScriptToEvaluateOnNewDocument", {
                "source":
                """
            Object.defineProperty(navigator, 'webdriver', {
            get: () => undefined
            })"""
            })
        return driver

    elif name == "firefox_headless":
        firefox_options = FF_Options()
        firefox_options.headless = True
        if driver_path is None:
            driver_path = FIREFOXDRIVER
        return webdriver.Firefox(firefox_options=firefox_options,
                                 executable_path=driver_path)

    elif name == 'edge':
        if driver_path is None:
            driver_path = EDGEDRIVER
        return webdriver.Edge(executable_path=driver_path)

    elif name == 'safari':
        if driver_path is None:
            driver_path = SAFAIRDRIVER
        return webdriver.Safari(executable_path=driver_path)

    elif name in PHONE_LIST:
        if driver_path is None:
            driver_path = CHROMEDRIVER
        options = CH_Options()
        options.add_experimental_option("mobileEmulation",
                                        {"deviceName": name})
        driver = webdriver.Chrome(chrome_options=options,
                                  executable_path=driver_path,
                                  options=option)
        driver.set_window_size(width=480, height=900)
        driver.execute_cdp_cmd(
            "Page.addScriptToEvaluateOnNewDocument", {
                "source":
                """
                    Object.defineProperty(navigator, 'webdriver', {
                    get: () => undefined
                    })"""
            })
        return driver

    elif name in PAD_LIST:
        if driver_path is None:
            driver_path = CHROMEDRIVER
        options = CH_Options()
        options.add_experimental_option("mobileEmulation",
                                        {"deviceName": name})
        driver = webdriver.Chrome(chrome_options=options,
                                  executable_path=driver_path,
                                  options=option)
        driver.set_window_size(width=1100, height=900)
        driver.execute_cdp_cmd(
            "Page.addScriptToEvaluateOnNewDocument", {
                "source":
                """
                    Object.defineProperty(navigator, 'webdriver', {
                    get: () => undefined
                    })"""
            })
        return driver

    else:
        raise NameError(
            "Not found '{}' browser, See the help doc: https://github.com/SeldomQA/seldom/blob/master/docs/driver.md'."
            .format(name))
Пример #7
0
def grab_img(user):
    grab1 = GrabIt()
    options = ChromeOptions()
    options.add_argument('headless')
    options.add_argument('disable-gpu')
    driver = Chrome(chrome_options=options)
    url = 'https://www.instagram.com/' + user + '/'
    driver.get(url)
    driver.implicitly_wait(5)
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    try:
        driver.find_element_by_xpath(
            '//*[@id="react-root"]/section/nav/div[2]/div/div/div[3]/div/section/div/a'
        ).click()
        driver.implicitly_wait(2)
    except:
        pass
    driver.find_element_by_xpath(
        "//a[text()[contains(.,'Load more')]]").click()
    driver.implicitly_wait(5)
    for _ in itertools.repeat(None, 100):
        driver.implicitly_wait(3)
        driver.execute_script(
            "window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)

    driver.implicitly_wait(5)
    elem = driver.find_elements_by_xpath('//*[@src]')

    for ii in elem:
        if 'https://scontent' in ii.get_attribute('src'):
            content2 = ii.get_attribute('src')
            content3 = re.sub(r's\w\w\wx\w\w\w\/',
                              '',
                              content2,
                              flags=re.IGNORECASE)
            content7 = re.sub(r'\w{3}\.\w{2}\/',
                              '',
                              content3,
                              flags=re.IGNORECASE)
            content6 = re.sub(r'\w{0,4}\.\d{0,4}\.\d{0,4}\.\d{0,5}\/',
                              '',
                              content7,
                              flags=re.IGNORECASE)
            content4 = re.sub(r'https:\/\/\w{8}\S+\w{4}-\w(.*)\/',
                              '',
                              content2,
                              flags=re.IGNORECASE)
            content5 = re.sub(r'\?ig_cache_key=\w+(\S+)',
                              '',
                              content4,
                              flags=re.IGNORECASE)
            content10 = re.sub(r'\/vp\/\w+\/\w+',
                               '',
                               content6,
                               flags=re.IGNORECASE)
            endpoint = os.path.join(os.path.dirname(__file__), user, content5)
            endpoint1 = os.path.join(os.path.dirname(__file__), user,
                                     user + '_' + content5)
            if not os.path.exists(user):
                os.makedirs(user)
            if os.path.isfile(endpoint) or os.path.isfile(endpoint1):
                print('file exists - skipping')
            else:
                try:
                    grab1.download_file(content10, endpoint1)
                    print(content5)
                except Exception as e:
                    print(str(e))

    driver.quit()
Пример #8
0

def cjy():
    chaojiying = Chaojiying_Client('cjyyhmbhf', 'pin86648535',
                                   '902284')  # 用户中心>>软件ID 生成一个替换 96001
    im = open('code.png', 'rb').read()  # 本地图片文件路径 来替换 a.jpg 有时WIN系统须要//
    real = chaojiying.PostPic(im, 1902)
    return real


# from selenium.webdriver.chrome.options import Options
#
#
# op = Options()
# op.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
op = ChromeOptions()
op.add_experimental_option("excludeSwitches", ["enable-automation"])
driver = webdriver.Chrome(executable_path='../chromedriver.exe', options=op)
driver.maximize_window()
driver.get('https://toefl.neea.cn/')
swA = ActionChains(driver)
cookies = driver.get_cookies()
time.sleep(1)
login_a = driver.find_element_by_xpath('/html/body/div[3]/div/div/div[2]/a[1]')
time.sleep(1)
for cookie in cookies:
    if "expiry" in cookie:
        cookie.pop("expiry")
    print(cookie)
    driver.add_cookie(cookie)
login_a.click()
Пример #9
0
def driver():
    driver = webdriver.Chrome(chrome_options=ChromeOptions(),
                              project_name="Examples",
                              job_name=None)
    yield driver
    driver.quit()
Пример #10
0
 def __init__(self):
     opts = ChromeOptions()
     opts.add_experimental_option("detach", True)
     self.browser = webdriver.Chrome(chrome_options=opts)
     self.update_info()
     self.browser.get('')
Пример #11
0
    def scrape(self):
        start = datetime.now()

        score_dict = {}
        options = ChromeOptions()
        user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.50 Safari/537.36'

        options.add_argument("--headless")
        options.add_argument("--disable-gpu")
        options.add_argument(f'user-agent={user_agent}')

        driver = Chrome(options=options)

        print('driver pre url: ', datetime.now() - start)
        driver.get(self.url)
        print('driver after url: ', datetime.now() - start)

        #soup = BeautifulSoup(driver.page_source, 'html.parser')
        try:
            #print (driver.page_source)
            lb = WebDriverWait(driver, 60).until(
                EC.presence_of_element_located(
                    (By.ID, "leaderBoardPlayersTraditionalContent")))
            print('a')
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            print('b')
            table = (soup.find("div",
                               {'id': 'leaderBoardPlayersTraditionalContent'}))
            print('c')
            leaderboard = soup.find(
                'div', {'id': 'leaderBoardPlayersTraditionalContent'})
            player_rows = soup.find_all('div', {'class': 'playerRow'})

            score_dict = {}

            for row in player_rows:
                masters_name = row.find('div', {
                    'class': 'playerName'
                }).find('div', {
                    'class': 'data'
                }).text
                for c in row['class']:
                    if c[:2] == 'pr':
                        player_num = c[2:]
                    else:
                        pass

                try:

                    try:
                        golfer = Golfer.objects.get(golfer_pga_num=player_num)
                        field = Field.objects.get(tournament=self.tournament,
                                                  golfer=golfer)
                        player_name = field.playerName
                    except Exception:
                        if Field.objects.filter(
                                tournament=self.tournament,
                                playerName__contains=masters_name.split(',')
                            [0].split(' ')[0].capitalize()).exists():
                            o = Field.objects.get(
                                tournament=self.tournament,
                                playerName__contains=masters_name.split(
                                    ',')[0].split(' ')[0].capitalize())
                            player_name = o.playerName
                        else:
                            print('cant find player', masters_player)
                    stats = row.find('div', {'class': 'playerStatContainer'})

                    pos = row.find('div', {
                        'class': 'pos'
                    }).find('div', {
                        'class': 'data'
                    }).text

                    if pos != "WD":

                        total = row.find('div', {
                            'class': 'playerStatContainer'
                        }).find('div', {
                            'attr': 'topar'
                        }).find('div', {
                            'class': 'data'
                        }).text
                        today = row.find('div', {
                            'class': 'playerStatContainer'
                        }).find('div', {
                            'attr': 'today3'
                        }).find('div', {
                            'class': 'data'
                        }).text
                        thru = row.find('div', {
                            'class': 'playerStatContainer'
                        }).find('div', {
                            'class': 'thru'
                        }).find('div', {
                            'class': 'data'
                        }).text
                        r1 = row.find('div', {
                            'class': 'playerStatContainer'
                        }).find('div', {
                            'class': 'r1'
                        }).find('div', {
                            'class': 'data'
                        }).text
                        if r1 == '':
                            r1 = '--'
                        r2 = row.find('div', {
                            'class': 'playerStatContainer'
                        }).find('div', {
                            'class': 'r2'
                        }).find('div', {
                            'class': 'data'
                        }).text
                        if r2 == '':
                            r2 = '--'

                        r3 = row.find('div', {
                            'class': 'playerStatContainer'
                        }).find('div', {
                            'class': 'r3'
                        }).find('div', {
                            'class': 'data'
                        }).text
                        if r3 == '':
                            r3 = '--'

                        r4 = row.find('div', {
                            'class': 'playerStatContainer'
                        }).find('div', {
                            'class': 'r4'
                        }).find('div', {
                            'class': 'data'
                        }).text
                        if r4 == '':
                            r4 = '--'
                    else:
                        total = ''
                        today = ''
                        thru = ''
                        r1 = row.find('div', {
                            'class': 'playerStatContainer'
                        }).find('div', {
                            'class': 'r1'
                        }).find('div', {
                            'class': 'data'
                        }).text
                        if r1 == '':
                            r1 = '--'
                        r2 = row.find('div', {
                            'class': 'playerStatContainer'
                        }).find('div', {
                            'class': 'r2'
                        }).find('div', {
                            'class': 'data'
                        }).text
                        if r2 == '':
                            r2 = '--'

                        r3 = row.find('div', {
                            'class': 'playerStatContainer'
                        }).find('div', {
                            'class': 'r3'
                        }).find('div', {
                            'class': 'data'
                        }).text
                        if r3 == '':
                            r3 = '--'

                        r4 = row.find('div', {
                            'class': 'playerStatContainer'
                        }).find('div', {
                            'class': 'r4'
                        }).find('div', {
                            'class': 'data'
                        }).text
                        if r4 == '':
                            r4 = '--'



                    score_dict[player_name] = {
                        'rank': pos, 'change': 'n/a', \
                        'thru': thru, 'round_score': today, 'total_score': total , 'r1': r1, 'r2': r2, 'r3': r3, 'r4': r4
                    }

                except Exception as e:
                    print('row execptino', e)

            cut_num = len([
                x for x in score_dict.values()
                if int(utils.formatRank(x['rank'])) <= 50
                and x['rank'] not in self.tournament.not_playing_list()
            ]) + 1
            cut_score = [
                x for x in score_dict.values()
                if int(utils.formatRank(x['rank'])) <= 50
                and x['rank'] not in self.tournament.not_playing_list()
            ] + 1
            self.tournament.cut_score = 'Cut Number ' + str(cut)
            self.tournament.save()

            return (score_dict)

        except Exception as e:
            print('scrape issues', e)
Пример #12
0
 def setUp(self):
     options = ChromeOptions()
     options.add_argument("--start-maximized")
     options.add_experimental_option("detach", True)
     self.driver = webdriver.Chrome(options=options)
     self.driver.get("http://kirv-ui-staging.herokuapp.com/signin")
    def get_jobs(start, end, webdriver_location, location='Pune', query=''):
        '''

            The function is used to scrape out the data from Indeed.com site.
            start: The starting page of search to retrieve data from.
            end: The ending page of search to retrieve data from.
            location: which particular place, city or country you want to retrive data of.
            query: 
            
            This is a static method and will return the dataframe which is processed during the training.
            
        '''

        warnings.filterwarnings('ignore')

        # spark = SparkSession.builder.config('spark.mongodb.input.uri', spark_mongo_server_connection_string).config('spark.mongodb.input.uri', spark_mongo_server_connection_string).appName('MongoDBIntegration').getOrCreate()

        df = pd.DataFrame(columns=[
            'Title', 'Location', 'Company', 'Salary', 'Sponsored',
            'Description', 'Time'
        ])
        opts = ChromeOptions()
        opts.headless = True
        driver = Chrome(webdriver_location, options=opts)
        # driver.maximize_window()

        title = ''
        loc = ''
        company = ''
        salary = ''
        sponsored = ''
        time = ''
        job_desc = ''

        for i in range(start, end):
            try:
                driver.get('https://www.indeed.co.in/jobs?q=' + query + '&l=' +
                           location + '&start=' + str(i))

                for job in driver.find_elements_by_class_name('result'):
                    soup = BeautifulSoup(job.get_attribute('innerHTML'),
                                         'html.parser')

                    try:
                        title = soup.find('a', class_='jobtitle').text.replace(
                            '\n', '').strip()
                    except:
                        title = ''

                    try:
                        loc = soup.find(class_='location').text
                    except:
                        loc = ''

                    try:
                        company = soup.find(class_='company').text.replace(
                            '\n', '').strip()
                    except:
                        company = ''

                    try:
                        salary = soup.find(class_='salary').text.replace(
                            '\n', '').strip()
                    except:
                        salary = ''

                    try:
                        sponsored = soup.find(class_='sponsoredGray').text
                        sponsored = 'Sponsored'
                    except:
                        sponsored = 'Organic'

                    sum_div = job.find_element_by_xpath('./div[3]')

                    try:
                        sum_div.click()
                    except:
                        close_button = driver.find_elements_by_class_name(
                            'popover-x-button-close')[0]
                        close_button.click()
                        sum_div.click()

                    try:
                        _time = soup.find(class_='date').text
                        if _time == 'Just posted' or _time == 'Today':
                            time = str(date.today())
                        elif _time == '1 day ago':
                            time = str(date.today() - timedelta(days=1))
                        elif _time == '2 days ago':
                            time = str(date.today() - timedelta(days=2))
                        elif _time == '3 days ago':
                            time = str(date.today() - timedelta(days=3))
                        elif _time == '4 days ago':
                            time = str(date.today() - timedelta(days=4))
                        elif _time == '5 days ago':
                            time = str(date.today() - timedelta(days=5))
                        elif _time == '6 days ago':
                            time = str(date.today() - timedelta(days=6))
                        elif _time == '7 days ago':
                            time = str(date.today() - timedelta(days=7))
                        elif _time == '8 days ago':
                            time = str(date.today() - timedelta(days=8))
                        elif _time == '9 days ago':
                            time = str(date.today() - timedelta(days=9))
                        elif _time == '10 days ago':
                            time = str(date.today() - timedelta(days=10))
                        elif _time == '11 days ago':
                            time = str(date.today() - timedelta(days=11))
                        elif _time == '12 days ago':
                            time = str(date.today() - timedelta(days=12))
                        elif _time == '13 days ago':
                            time = str(date.today() - timedelta(days=13))
                        elif _time == '14 days ago':
                            time = str(date.today() - timedelta(days=14))
                        elif _time == '15 days ago':
                            time = str(date.today() - timedelta(days=15))
                        elif _time == '16 days ago':
                            time = str(date.today() - timedelta(days=16))
                        elif _time == '17 days ago':
                            time = str(date.today() - timedelta(days=17))
                        elif _time == '18 days ago':
                            time = str(date.today() - timedelta(days=18))
                        elif _time == '19 days ago':
                            time = str(date.today() - timedelta(days=19))
                        elif _time == '20 days ago':
                            time = str(date.today() - timedelta(days=20))
                        elif _time == '21 days ago':
                            time = str(date.today() - timedelta(days=21))
                        elif _time == '22 days ago':
                            time = str(date.today() - timedelta(days=22))
                        elif _time == '23 days ago':
                            time = str(date.today() - timedelta(days=23))
                        elif _time == '24 days ago':
                            time = str(date.today() - timedelta(days=24))
                        elif _time == '25 days ago':
                            time = str(date.today() - timedelta(days=25))
                        elif _time == '26 days ago':
                            time = str(date.today() - timedelta(days=26))
                        elif _time == '27 days ago':
                            time = str(date.today() - timedelta(days=27))
                        elif _time == '28 days ago':
                            time = str(date.today() - timedelta(days=28))
                        elif _time == '29 days ago':
                            time = str(date.today() - timedelta(days=29))
                        elif _time == '30 days ago':
                            time = str(date.today() - timedelta(days=30))
                        else:
                            time = str(date.today() -
                                       timedelta(days=randint(31, 181)))
                    except:
                        time = str(date.today() -
                                   timedelta(days=randint(31, 181)))
                    driver.implicitly_wait(10)

                    try:
                        job_desc = driver.find_element_by_id('vjs-desc').text
                    except:
                        job_desc = None

                    df = df.append(
                        {
                            'Title': title,
                            'Location': loc,
                            'Company': company,
                            'Salary': salary,
                            'Sponsored': sponsored,
                            'Description': job_desc,
                            'Time': time
                        },
                        ignore_index=True)

                    # data = Row(dict(Title=str(title), Location=str(loc), Company=str(company), Salary=str(salary), Sponsored=str(sponsored), Description=str(job_desc), Time=str(time)))
                    # spark.createDataFrame(data).write.format('com.mongodb.spark.sql.DefaultSource').option('uri', spark_mongo_server_connection_string).mode('append').save()
            except Exception as e:
                print(e)

            finally:
                try:
                    if not os.path.isdir(download_directory):
                        os.mkdir(download_directory)

                    n = download_directory + str(
                        hashlib.md5(str(datetime.now()).encode()).hexdigest().
                        encode()).replace("b'", '').replace("'", '') + '.xlsx'

                    df.to_excel(n, index=False)
                except Exception as e:
                    print(e)

        driver.close()
"""
    Using this file as a playground to test new functionality
    without running the whole bot.
"""

from selenium.webdriver import Chrome, ChromeOptions
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.common.exceptions import StaleElementReferenceException

from os import path, makedirs
from time import sleep

opt = ChromeOptions()
opt.add_experimental_option('w3c', False)
driver = Chrome(chrome_options=opt)
driver.get('https://www.apple.com')
sleep(2)
print("and:")
print(driver.get_log('browser'))
username, password = open('ig.credentials.txt').read().split(',')
user = {'username': username, 'password': password}


def open_instagram_and_login():
    """ Opens instagram.com in Chrome and logs you in using given credentials """

    login_route = "https://www.instagram.com/accounts/login/?source=auth_switcher"
Пример #15
0
def get_driver(headless=False):
    chrome_options = ChromeOptions()
    if headless:
        chrome_options.add_argument("--headless")
    return Chrome(chrome_options=chrome_options)
Пример #16
0
 def __init__(self):
     self.Date = datetime.datetime.today().strftime('%Y%m%d')
     self.driver_path = 'C:/Users/Administrator/Downloads/chromedriver'
     self.Options = ChromeOptions()
     self.url = "https://www.7788365365.com/?&cb=105812118651#/IP/"
Пример #17
0
def set_driver(isHeadless=False, isManager=False, isSecret=False):

    options = ChromeOptions()

    user_agent = [
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
    ]

    if os.name == 'nt':  # Windows
        driver_path = 'chromedriver.exe'
    elif os.name == 'posix':  # Mac
        driver_path = 'chromedriver'

    if isHeadless:
        options.add_argument('--headless')
        options.add_argument('--single-process')

    if isSecret:
        options.add_argument('--incognito')  # シークレットモードの設定を付与
    else:
        if not isHeadless:
            options.add_argument('--user-data-dir=profile')

    options.add_argument('--disable-gpu')
    options.add_argument('--no-sandbox')
    options.add_argument('log-level=3')
    options.add_argument('--ignore-ssl-errors')
    options.add_argument(
        f'--user-agent={user_agent[random.randrange(0, len(user_agent), 1)]}')
    options.add_argument('--start-maximized')
    options.add_argument('--ignore-certificate-errors')
    options.add_argument('--allow-running-insecure-content')
    options.add_argument('--disable-web-security')
    options.add_argument('--disable-desktop-notifications')
    options.add_argument('--disable-application-cache')
    options.add_argument("--disable-extensions")
    options.add_argument('--lang=ja')

    if isManager:  # 自動取得
        try:
            driver = Chrome(ChromeDriverManager().install(), options=options)
        except InvalidArgumentException as err:
            logger.error(err)
            logger.error('既存のブラウザを閉じで実行してください。')
            return None
    else:  # 手動取得

        try:
            path = os.getcwd() + '/' + driver_path
            driver = Chrome(executable_path=path, options=options)
        except InvalidArgumentException as err:
            logger.error(err)
            logger.error('既存のブラウザを閉じで実行してください。')
            return None
        except WebDriverException as err:
            logger.error(err)
            logger.error('Chromeと同じバージョンのChrome Driverをダウンロードしてください。')
            return None

    return driver
Пример #18
0
    def scrape_garmin(self):
        options = ChromeOptions()
        #options.add_argument("--headless")
        options.add_argument("--disable-gpu")
        options.add_argument('--ignore-certificate-errors')
        options.add_argument("--window-size=1920,1080")
        driver = Chrome(options=options)
        driver.get(self.url)

        run_dict = {}

        try:
            main_page = driver.current_window_handle

            login = driver.find_element_by_xpath(
                '/html/body/div/div/div/header/nav/ul/li[4]/a/button')
            login.click()

            for handle in driver.window_handles:
                print(handle)
                if handle != 'main page':
                    login_page = handle

            driver.switch_to_window(login_page)
            time.sleep(5)

            print('before email')
            try:
                email_id = driver.find_element_by_xpath(
                    '//*[@id="com.fitnesskeeper.runkeeper.pro:id/login-a_email"]'
                )
            except Exception as e:
                print('email exception', e)
                email_id = driver.find_element_by_id('input.username')

            print('after email')
            email_id.send_keys(os.environ.get('email_address'))

            password = email_id = driver.find_element_by_id('#password')
            password.send_keys(os.environ.get('garmin_pwd'))
            time.sleep(5)
            sub_button = driver.find_element_by_id('login-btn-signin')
            sub_button.click()
            #driver.execute_script("arguments[0].click()",sub_button)

            time.sleep(2)
            driver.switch_to.window(main_page)
            driver.maximize_window()

            first = driver.find_element_by_class_name('feedArrow')

            first.click()
            a_list = driver.find_element_by_id(
                'ui-accordion-activityHistoryMenu-panel-0')
            lines = a_list.find_elements_by_tag_name('li')

            for i, a in enumerate(lines):
                #time.sleep(5)
                if i >= 1:
                    a_list = driver.find_element_by_id(
                        'ui-accordion-activityHistoryMenu-panel-0')
                    a_list.find_elements_by_tag_name('li')[i].click()

                date = driver.find_element_by_class_name(
                    'micro-text.activitySubtitle')
                activity_list = driver.find_element_by_id(
                    'activityHistoryMenu')
                first_activity_div = activity_list.find_element_by_class_name(
                    'selected')
                first_activity = first_activity_div.find_element_by_tag_name(
                    'a')
                dist = driver.find_element_by_id('totalDistance')
                duration = driver.find_element_by_id('totalDuration')
                cals = driver.find_element_by_id('totalCalories')

                if 'Running' in first_activity.text:
                    run_dict[date.text] = first_activity.text, dist.text.split(
                        '\n')[1], duration.text.split(
                            '\n')[1], cals.text.split('\n')[1]

        except Exception as e:
            print('exception', e)

        finally:
            driver.quit()

        return run_dict
Пример #19
0
class Browser():
    chrome_options = ChromeOptions()

    def __init_(self):
        pass

        # self.chrome_options.add_argument("window-size=1920,1080")
        # self.chrome_options.add_argument("--headless")

    def get_info(self, link):
        chrome_options = ChromeOptions()
        chrome_options.add_argument("--headless")

        chrome = Chrome(chrome_options=chrome_options)
        chrome.get(link)

        # get the links to the chapters
        links = chrome.find_elements_by_css_selector(
            "[class^=chapter-title] a")
        links.reverse()
        chapter_urls = list(map(lambda a: a.get_attribute("href"), links))

        # get the chapter numbers
        chapter_numbers = chrome.find_elements_by_css_selector(
            "[class^=chapter-title] em")
        chapter_numbers.reverse()
        sanitized_numbers = list(
            map(
                lambda nr: ''.join(c for c in nr.text
                                   if c.isdigit() or c == "."),
                chapter_numbers))

        title = chrome.find_element_by_css_selector(".widget-title")

        print(chapter_numbers[0].text)

        return {
            "manga_title": title.text,
            "manga_chapter_names": sanitized_numbers,
            "manga_chapter_links": chapter_urls
        }

    def download_chapters(self, destination, meta, start, end):
        print("downloading chapters")
        print(destination, meta, start, end)
        manga_title = meta["manga_title"]
        manga_chapter_names = meta["manga_chapter_names"]

        if os.path.exists(destination):
            manga_dir = os.path.join(destination, manga_title)

            if not os.path.isfile(destination):
                if not os.path.exists(manga_dir):
                    os.mkdir(os.path.join(destination, manga_dir))

                chrome_options = ChromeOptions()
                # chrome_options.add_argument("--headless")

                chrome = Chrome(chrome_options=chrome_options)

                for index, chapter_link in enumerate(
                        meta["manga_chapter_links"]):
                    if index < start:
                        continue

                    print(chapter_link)
                    chapter_dir = os.path.join(manga_dir,
                                               manga_chapter_names[index])

                    if not os.path.exists(chapter_dir):
                        os.mkdir(chapter_dir)

                    chrome.get(chapter_link)
                    page_count = len(
                        chrome.find_elements_by_css_selector(
                            "#page-list option"))

                    for ix in range(1, page_count + 1):
                        print(f'getting { chapter_link }/{ ix }')
                        chrome.get(f'{ chapter_link }/{ ix }')

                        download_link = chrome.find_element_by_css_selector(
                            '.scan-page').get_attribute("src")

                        # remote_file = urllib.request.urlopen(download_link)
                        # info = remote_file.info()['Content-Disposition']
                        # value, params = cgi.parse_header(info)
                        # filename = params["filename"]

                        urllib.request.urlretrieve(
                            download_link,
                            os.path.join(chapter_dir, f'{ix:03}.jpg'))

                    if index >= end:
                        break

                chrome.close()

            else:
                print("Path is a file!")
        else:
            print("Path already exists")
Пример #20
0
    def scrape(self):
        options = ChromeOptions()
        options.add_argument("--headless")
        options.add_argument("--disable-gpu")
        options.add_argument('--ignore-certificate-errors')
        options.add_argument("--window-size=1920,1080")

        driver = Chrome(options=options)
        driver.get(self.url)

        run_dict = {}

        try:
            print('scraping runkeeper')
            main_page = driver.current_window_handle
            login = driver.find_element_by_xpath(
                '/html/body/cset/div[1]/div/ul/li[7]/a')

            login.click()
            print('scrape logging in')
            for handle in driver.window_handles:
                print(handle)
                if handle != 'main page':
                    login_page = handle

            driver.switch_to_window(login_page)
            time.sleep(5)

            try:
                email_id = driver.find_element_by_xpath(
                    '//*[@id="com.fitnesskeeper.runkeeper.pro:id/login-a_email"]'
                )
            except Exception:
                email_id = driver.find_element_by_name('a_email')

            email_id.send_keys(os.environ.get('email_address'))

            password = email_id = driver.find_element_by_name('a_password')
            password.send_keys(os.environ.get('runkeeper_pwd'))
            time.sleep(5)

            try:
                sub_button = driver.find_element_by_id(
                    'com\.fitnesskeeper\.runkeeper\.pro\:id\/login-oneasics-login'
                )
            except Exception:
                sub_button = driver.find_element_by_xpath(
                    '//*[@id="com.fitnesskeeper.runkeeper.pro:id/login-oneasics-login"]'
                )

            driver.execute_script("arguments[0].click()", sub_button)
            print('scrape loggedg in')
            time.sleep(2)
            driver.switch_to.window(main_page)
            #driver.maximize_window()

            #try:
            #    wait = WebDriverWait(driver, 10)
            #    first = wait.until(ec.visibility_of_element_located((By.XPATH, '//*[@id="pageWrapper"]/div[2]/div[1]/div/div[3]/div/div[2]/div[3]/div[1]/div[1]/div[1]/div[2]/ul')))
            #except Exception as e5:
            #    print (e5)
            time.sleep(30)
            try:
                first = driver.find_element_by_xpath(
                    '//*[@id="pageWrapper"]/div[2]/div[1]/div/div[3]/div/div[2]/div[3]/div[1]/div[1]/div[1]/div[2]/ul'
                )

            except Exception as e:
                print(e)
                try:
                    first = driver.find_element_by_xpath(
                        '/html/body/div[3]/div[2]/div[1]/div/div[3]/div/div[2]/div[3]/div[1]/div[1]/div[2]'
                    )
                except Exception as e1:
                    print(e1)
                    first = driver.find_element_by_class_name(
                        'feedArrow.clickable')

            first.click()

            try:
                a_list = driver.find_element_by_id(
                    'ui-accordion-activityHistoryMenu-panel-0')
            except Exception:
                a_list = driver.find_element_by_xpath(
                    '//*[@id="activityHistoryMenu"]')

            lines = a_list.find_elements_by_tag_name('li')
            print('before looping lines')

            for i, a in enumerate(lines):
                #time.sleep(5)
                if i >= 1:
                    a_list = driver.find_element_by_id(
                        'ui-accordion-activityHistoryMenu-panel-0')
                    a_list.find_elements_by_tag_name('li')[i].click()

                date = driver.find_element_by_class_name(
                    'micro-text.activitySubtitle')
                activity_list = driver.find_element_by_id(
                    'activityHistoryMenu')
                first_activity_div = activity_list.find_element_by_class_name(
                    'selected')
                first_activity = first_activity_div.find_element_by_tag_name(
                    'a')
                dist = driver.find_element_by_id('totalDistance')
                duration = driver.find_element_by_id('totalDuration')
                cals = driver.find_element_by_id('totalCalories')

                if 'Running' in first_activity.text:
                    run_dict[date.text] = first_activity.text, dist.text.split(
                        '\n')[1], duration.text.split(
                            '\n')[1], cals.text.split('\n')[1]

        except Exception as e:
            print('exception', e)

        finally:
            driver.quit()

        return run_dict
Пример #21
0
def fill_timesheet() -> None:
    """Just open the timesheet filling table and close after 5 minutes."""
    options = ChromeOptions()
    driver = Chrome(executable_path=environ.get("CHROME_DRIVER"),
                    options=options)
    driver.maximize_window()
    wait = WebDriverWait(driver, 60)

    selectors = Config(
        RepositoryEnv(path.join(FILE_DIRECTORY, "timesheet_selectors.env")))
    try:
        logger.info("Opening ERP link in browser")
        driver.get(config("ERP_LINK"))
        logger.info("Waiting for login button to be clickable")
        wait.until(
            EC.element_to_be_clickable(
                (By.CSS_SELECTOR, selectors("login_button"))))
        logger.info("Entering username")
        driver.find_element_by_id(selectors("username_input")) \
            .send_keys(config("USERNAME"))
        logger.info("Entering password")
        driver.find_element_by_id(selectors("password_input")) \
            .send_keys(config("PASSWORD"))
        logger.info("Clicking login button")
        driver.find_element_by_css_selector(selectors("login_button")).click()

        logger.info("Waiting for left expand menu to be clickable")
        wait.until(
            EC.element_to_be_clickable(
                (By.CSS_SELECTOR, selectors("left_expand_menu"))))
        logger.info("Clicking timesheet expand menu")
        driver.find_element_by_css_selector(selectors("left_expand_menu"))\
            .click()
        logger.info("Waiting for timesheet submenu to be visible")
        wait.until(
            EC.visibility_of_element_located(
                (By.CSS_SELECTOR, selectors("timesheet_menu"))))
        logger.info("Clicking timesheet expand menu")
        driver.find_element_by_css_selector(selectors("timesheet_menu"))\
            .click()
        logger.info("Waiting for recent timecard to be clickable")
        wait.until(
            EC.element_to_be_clickable(
                (By.CSS_SELECTOR, selectors("recent_timecard_link"))))
        logger.info("Clicking recent timecard link")
        driver.find_element_by_css_selector(selectors("recent_timecard_link"))\
            .click()
        logger.info("Waiting for timesheet table to load")
        wait.until(
            EC.presence_of_element_located(
                (By.CSS_SELECTOR, selectors("timesheet_first_row"))))
        logger.info("Get first row of timesheet table")
        timesheet_first_row = driver.find_element_by_css_selector(
            selectors("timesheet_first_row"))
        logger.info("Getting each data in first row of timesheet")
        first_row_data = timesheet_first_row.find_elements_by_tag_name(
            selectors("first_row_data"))

        if first_row_data[1].text == selectors("timesheet_status_new"):
            logger.info("Editing the timesheet")
            first_row_data[7].find_element_by_css_selector(
                selectors("edit_timesheet_button")).click()
        elif first_row_data[1].text == selectors("timesheet_status_old"):
            logger.info("Creating a new timesheet")
            driver.find_element_by_id(selectors("create_card_button"))

        logger.info("Going to sleep for 900 seconds")
        sleep(900)
        logger.info("Wakey wakey!!")

        logger.info("Checking if browser is running")
        if driver.title:
            logger.info("Browser is still open. Trying to find logout button")
            driver.find_element_by_css_selector(selectors("logout_button"))\
                .click()
    except NoSuchElementException as e:
        logger.error("Element not found %s " % e, exc_info=True)
    except TimeoutException as e:
        logger.error("Tired of waiting %s" % e, exc_info=True)
    except WebDriverException as e:
        logger.error("Webdriver crash %s" % e, exc_info=True)
    finally:
        logger.info("Closing driver. Peace out.")
        driver.quit()
Пример #22
0
def pytest_runtest_setup(item):
    global options

    language_to_use = "es" if item.function.__name__ == "test_spanish_language" else "fr"
    options = ChromeOptions()
    options.add_argument(f"--lang={language_to_use}")
Пример #23
0
def chrome_driver():
    options = ChromeOptions()
    options.add_experimental_option("detach", True)  # allow the driver to stay open for debug
    driver = Chrome(options=options)
    driver.implicitly_wait(15)
    return driver
Пример #24
0
    def post(self):

        req_parser = reqparse.RequestParser()
        req_parser.add_argument('url', type=str, required=True)
        args = req_parser.parse_args()

        url = args['url']
        if not url:
            return {
                'url': url,
                'x5sec': '',
            }

        option = ChromeOptions()
        # option.add_argument('--headless')
        option.add_argument('--no-sandbox')
        # option.add_argument('--proxy-server=http://HD3P6R2K3912I09D:[email protected]:9020')
        option.add_argument(
            'user-agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36"')
        option.add_argument('--disable-dev-shm-usage')
        option.add_argument('--disable-extensions')
        option.add_argument('--disable-gpu')
        option.add_argument("--disable-features=VizDisplayCompositor")
        option.add_experimental_option('excludeSwitches', ['enable-automation'])
        option.add_experimental_option("useAutomationExtension", False)
        # option.binary_location = '/root/Downloads/login_taobao/node_modules/puppeteer/.local-chromium/linux-672088/chrome-linux/chrome'
        wd = Chrome(options=option, executable_path='chromedriver')
        # wd = Chrome(ChromeDriverManager().install(), options=option)
        wd.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
            "source": """
                               Object.defineProperty(navigator, 'webdriver', {
                                 get: () => undefined
                               });
                               Object.defineProperty(navigator, 'language', {
	                             get: () => "zh-CN"
                               });
                               Object.defineProperty(navigator, 'deviceMemory', {
	                             get: () => 8
                               });
                               Object.defineProperty(navigator, 'hardwareConcurrency', {
	                             get: () => 8
                               });
                               Object.defineProperty(navigator, 'platform', {
	                             get: () => 'MacIntel'
                               });
                               Object.defineProperty(navigator, 'userAgent', {
                                 get: () => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
                               });
                               Object.defineProperty(navigator, 'plugins', {
                                 get: () => [1, 2, 3, 4, 5]
                               });
                             """
        })
        wd.set_page_load_timeout(20)
        _timeout = WebDriverWait(wd, 20)
        try:
            x5sec = ''
            wd.get(url)
            wd.implicitly_wait(10)
            wd.delete_all_cookies()

            # todo 多页面并行实现
            cnt = 0
            while True:
                time.sleep(0.2)
                wd.find_element_by_id("nc_1_n1z").click()
                slid_ing = wd.find_element_by_id("nc_1_n1z")
                ActionChains(wd).click_and_hold(on_element=slid_ing).perform()
                time.sleep(0.2)
                lgh = 0
                try:
                    while lgh <= 510:
                        lgh += random.randint(30, 50)
                        ActionChains(wd).move_by_offset(xoffset=lgh, yoffset=0).perform()
                    time.sleep(0.2)
                    ActionChains(wd).release().perform()

                except:
                    time.sleep(0.2)
                    ActionChains(wd).release().perform()

                try:
                    slide_refresh = wd.find_element_by_xpath("//div[@id='nocaptcha']/div/span/a")
                    slide_refresh.click()
                except:
                    break
                cnt += 1
                if cnt > 10:
                    break
            cookies = wd.get_cookies()
            wd.close()
            for x5sec_data in cookies:
                if 'x5sec' in x5sec_data.values():
                    x5sec = x5sec_data['value']
            return {
                'x5sec': x5sec,
            }
        except:
            wd.close()
            return {
                'url': url,
                'x5sec': '',
            }
Пример #25
0
    def run(self, url):

        self.checkDir()
        self.total = 1
        self.index = 0

        self.callback('data', title='拉取数据')
        rep = requests.get(
            url,
            headers={
                'User-Agent':
                'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36',
                'Host': url.split('/')[2]
            })
        title = re.findall('partName:"(.*)",serialno', rep.text)[0]

        option = ChromeOptions()
        option.add_argument('headless')  # 设置option
        option.add_experimental_option('excludeSwitches',
                                       ['enable-automation'])
        driver = Chrome(options=option)
        driver.get("http://jiexi.380k.com/?url=" + url)
        iframe = driver.find_elements_by_tag_name("iframe")[0]
        driver.switch_to_frame(iframe)

        _url = None
        while not _url:
            try:
                iframe = driver.find_elements_by_tag_name("iframe")[0]
                driver.switch_to_frame(iframe)
                _url = driver.execute_script("return dp.option.video.url[0];")
            except BaseException:
                pass
            time.sleep(1)
        driver.quit()

        urlPre = _url.replace("//", "||").split('/')
        urlPre.pop()
        base_url = "/".join(urlPre).replace("||", "//")
        rep = requests.get(
            _url,
            headers={
                'User-Agent':
                'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36',
                'Host': _url.split('/')[2],
                'Referer': 'https://mg.4119.tv/'
            })

        videos = []
        for filename in rep.text.split('\n'):
            if 'mp4.ts' in filename:
                videos.append(base_url + '/' + filename)

        self.total = len(videos)
        self.index = 0
        self.callback('data', title='拉取数据成功')
        self.callback('data', title='准备下载')

        index = 0
        taskContent = ""
        tempPath = self.savepath + sep + title
        self.checkDir(tempPath)
        output = self.savepath + sep + title + '.' + self.getVideoFormat()
        for url in videos:
            filename = tempPath + sep + \
                str(index) + '.' + self.getVideoFormat()
            taskContent += self.concatContent(filename)
            self.getFileByUrl(url, filename, title + '_' + str(index))
            index += 1
            self.writeConcatFile(taskContent)

        self.videoMerge(self.taskFile, output, title)

        try:
            shutil.rmtree(tempPath)
            os.remove(self.taskFile)
        except BaseException:
            pass

        self.callback('data')
Пример #26
0
        return False

def versesTotal(abbrev, chapter, token):
    '''
    Return the total of verses of a chapter requesting bibleapi
    '''
    response = requests.get(
        f"https://bibleapi.co/api/verses/ra/{abbrev}/{str(chapter)}", 
        headers = {"Authorization": "Bearer " + token},
    )
    result = response.json()
    return result["chapter"]["verses"]

book_list = []

opcoes = ChromeOptions()
# opcoes.binary_location = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe"
browser = Chrome(chrome_options = opcoes)

for abbrev in book_list:
    error = False

    chapter = 1
    totalChapters = requests.get(
            f"https://bibleapi.co/api/books/{abbrev}/", 
            headers = {"Authorization": "Bearer " + token},
        ).json()["chapters"]
    book = {}

    while chapter <= totalChapters and not error:
        print(f"Capturing the chapter {chapter} with ", end = "")
Пример #27
0
def ActionChainsLogin():
    chromeOption = ChromeOptions()
    chromeOption.add_argument("--disable-extensions")
    chromeOption.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
    browser = webdriver.Chrome(
        executable_path="I:/chromedriver/chromedriver.exe",
        options=chromeOption)
    try:
        # 首先把网页最大化便于后续操作
        browser.maximize_window()
    except:
        pass
    browser.get("https://login.zhipin.com/")
    time.sleep(2)

    slide_btn = browser.find_element_by_css_selector(
        'span[class="nc_iconfont btn_slide"]')
    success = False
    count = 1
    step_n = 1
    while not success:
        # 通过css选择器找到滑块slide_btn后操控鼠标按住左键
        ActionChains(browser).click_and_hold(slide_btn).perform()
        while count < 300:
            ActionChains(browser).move_by_offset(step_n, 0).perform()
            # step_n为相对于原地向右移动多少距离,这里设置成随机数每次移动距离在[0,10)之间
            step_n = randint(0, 10)
            # 当count大于300即证明滑块已经滑动到最右边
            count = count + step_n
            time.sleep(0.001)
        # 滑块移动到最右边后释放鼠标左键
        ActionChains(browser).release().perform()
        try:
            browser.find_element_by_class_name("errloading")  # 滑动失败报错
        except Exception as e:
            success = True  #滑动成功退出循环
        else:
            browser.refresh()

    time.sleep(1)
    browser.find_element_by_css_selector(
        "form .form-row .ipt-wrap input[type='tel']").send_keys(Keys.CONTROL +
                                                                "a")
    time.sleep(1)

    browser.find_element_by_css_selector(
        "form .form-row .ipt-wrap input[type='tel']").send_keys("13247598671")

    time.sleep(1)
    browser.find_element_by_css_selector(
        "form .form-row .ipt-wrap input[type='password']").send_keys(
            Keys.CONTROL + "a")
    time.sleep(1)
    browser.find_element_by_css_selector(
        "form .form-row .ipt-wrap input[type='password']").send_keys(
            "156416421727av")

    # 通过css选择器找到“登录”按钮并点击
    browser.find_element_by_css_selector(
        ".sign-pwd .sign-content .form-btn button").click()

    cookies = browser.get_cookies()
    return cookies
def order(shop=None,
          browser=None,
          lego_set=None,
          order_list=None,
          username=None,
          password=None):
    """
    Fill in LEGO parts to be ordered in LEGO's customer service shop.
    """
    electric_part_list = []

    from selenium import webdriver

    from selenium.common.exceptions import NoSuchElementException
    from selenium.common.exceptions import TimeoutException

    from selenium.webdriver import Chrome, Firefox, ChromeOptions
    from selenium.webdriver.common.keys import Keys
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.support.select import Select
    from selenium.webdriver.support.wait import WebDriverWait
    from time import sleep

    order_list = order_list.split(',')

    shop_url = 'https://wwwsecure.us.lego.com/{shop}/service/replacementparts/sale'.format(
        shop=shop)

    print("Using Selenium version : ", webdriver.__version__)
    print("Browser URL : {url}".format(url=shop_url))

    # detect browser choice #
    if browser == 'chrome':
        opts = ChromeOptions()
        # With selenium version above this one, chrome is closed
        # at the end without the "quit()" method!
        # Here is a fix to detach Chrome from python.
        if webdriver.__version__ > '2.48.0':
            print("Apply experimental detach option for Chrome")
            opts.add_experimental_option("detach", True)

        browser = Chrome(chrome_options=opts)
    else:
        browser = Firefox()

    print("Browser capabilities")
    print(browser.capabilities)

    # Selenium can't find some elements otherwise
    browser.maximize_window()

    browser.get(shop_url)

    # will wait to 5 sec for and ExpectedCondition success,
    # otherwise exception TimeoutException
    wait = WebDriverWait(browser, 5)

    print("Sometimes they ask you to fill in a survey.")

    try:
        survey_layer = browser.find_element_by_id('ipeL104230')
        survey_layer.send_keys(Keys.ESCAPE)
    except NoSuchElementException:
        print("We're lucky, no survey on the LEGO shop today!")

    print("They want to know how old we are.")
    age_field = wait.until(
        EC.element_to_be_clickable((By.NAME, 'rpAgeAndCountryAgeField')))
    age_field.send_keys('55')
    age_field.send_keys(Keys.RETURN)

    # wait for age_field's DOM element to be removed
    wait.until(EC.staleness_of(age_field))

    # login stuff #
    if username and password:

        print("Let's log in with LEGO ID {user}.".format(user=username))
        login_link = wait.until(
            EC.element_to_be_clickable(
                (By.CSS_SELECTOR,
                 ".legoid .links > a[data-uitest='login-link']")))
        login_link.click()

        browser.switch_to.frame('legoid-iframe')

        user_input = wait.until(
            EC.element_to_be_clickable((By.ID, 'fieldUsername')))
        user_input.click()
        user_input.send_keys(username)

        passwd_input = wait.until(
            EC.element_to_be_clickable((By.ID, 'fieldPassword')))
        passwd_input.click()
        passwd_input.send_keys(password)

        login_button = browser.find_element_by_id('buttonSubmitLogin')
        login_button.click()

        browser.switch_to.default_content()

        # ensure the user/password are good
        try:
            wait.until(
                EC.element_to_be_clickable(
                    (By.CSS_SELECTOR,
                     ".legoid .links > a[data-uitest='logout-link']")))

            print("login success!")
        except TimeoutException:
            print("login failed!")
            # close the browser and stop here
            browser.quit()
            return

    # product selection #

    print(
        "We need to tell them which set we want to buy parts from: {lego_set}".
        format(lego_set=lego_set))
    setno_field = wait.until(
        EC.element_to_be_clickable(
            (By.CSS_SELECTOR,
             '.product-search input[ng-model=productNumber]')))

    setno_field.send_keys(lego_set)
    setno_field.send_keys(Keys.RETURN)

    print("Let's scroll the page down a bit, so we can see things better.")
    browser.execute_script("window.scroll(0, 750);")

    print(
        "That's gonna be crazy: {count} elements to order! Let's rock.".format(
            count=len(order_list)))
    print()

    counter = 0
    out_of_stock_counter = 0
    not_in_set_counter = 0
    found_counter = 0
    electric_part_counter = 0

    total_elements = len(order_list)

    for brick in order_list:
        part_no, quantity = brick.split(':')

        counter += 1

        print("- [{counter}/{total_elements}] {qty}x #{pn} ".format(
            qty=quantity,
            pn=part_no,
            counter=counter,
            total_elements=total_elements),
              end='')

        element_field = wait.until(
            EC.element_to_be_clickable((By.ID, 'element-filter')))

        element_field.clear()
        element_field.send_keys(part_no)
        element_field.send_keys(Keys.RETURN)
        sleep(.3)  # seconds

        try:
            add_button = browser.find_element_by_css_selector(
                '.element-details + button')
            if add_button.is_enabled():
                add_button.click()
                sleep(.2)  # seconds
            else:
                out_of_stock_counter += 1
                print("NOTE: item out of stock.")
                continue

            print("Found", end='')
            found_counter += 1

            # set the value for item's quantity drop-down menu
            amount_select = browser.find_elements_by_css_selector(
                '.bag-item select')[-1]
            Select(amount_select).select_by_visible_text(quantity)

            # ensure the value is correct
            selected = Select(amount_select).first_selected_option

            if quantity != selected.text:
                print("WARNING: Could not select desired quantity. {} != {}".
                      format(quantity, selected.text))
            else:
                print()

        except NoSuchElementException:

            if is_electric_part(part_no):
                print(
                    "The LEGO Group provides electric part out of set #{set}, see note at the end."
                    .format(set=lego_set))
                electric_part_list.append(part_no)
                electric_part_counter += 1
            else:
                print(
                    "OOOPS! No LEGO part with that number found in set #{set}. :-("
                    .format(set=lego_set))
                not_in_set_counter += 1
            continue

    browser.execute_script("window.scroll(0, 0);")
    print()
    print("We're done. You can finalize your order now. Thanks for watching!")
    print()
    print("Statistics :")
    print("- {s} Wanted elements".format(s=total_elements))
    print("- {s} Elements found".format(s=found_counter))
    print("- {s} Elements not in set".format(s=not_in_set_counter))
    print("- {s} Elements out of stock".format(s=out_of_stock_counter))
    print("- {s} Elements of type 'Electric part'".format(
        s=electric_part_counter))

    if electric_part_counter > 0:
        print()
        print(
            "Electric parts you can add to your bag once you've added your order :"
        )

        for item in electric_part_list:
            is_electric_part(item, True)
def main(url, base_url):
    data = []
    s_url = []
    gid_lists = getdata(url, base_url)
    #合成手机端新闻内容链接
    for j in range(len(gid_lists)):
        source_url = base_url + gid_lists[j] + '/'
        s_url.append(source_url)
    #selenuim配置
    option = ChromeOptions()
    prefs = {"profile.managed_default_content_settings.images": 2, 'permissions.default.stylesheet': 2}
    option.add_argument(
        'user-agent="Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Mobile Safari/537.36"')
    option.add_experimental_option("prefs", prefs)
    option.add_experimental_option('excludeSwitches', ['enable-automation'])
    header = random.choice(header_list)
    option.add_argument(header)
    # 无头浏览器
    option.add_argument('--headless')
    option.add_argument('--disable-gpu')
    driver = Chrome(options=option)
    j = 0
    last_content = ""
    i = 0
    repeat_num = 0
    #循环遍历链接获取文章内容及时间
    while i < len(s_url):
        # print('正在下载第', i + 1, '个题目:')
        # print(s_url[i])
        driver.get(s_url[i])
        # 隐性等待,最长等5秒
        driver.implicitly_wait(5)
        tree = etree.HTML(driver.page_source)
        page_list = ''
        #文章p列表
        page_list = tree.xpath('//article//p//text()')
        #新闻时间列表
        ttime = tree.xpath('//div[@class="article-sub"]/span[last()]/text()')
        #过滤视频内容
        if ttime:
            # print("时间",ttime)
            content = ''
            for p in page_list:
                content = content + p
            # print(content)
            # 防止重复,重复次数最大不能超过5
            if last_content==content:
                # print("重复")
                repeat_num = repeat_num+1
                if repeat_num > 5:
                    i=i+1
                    repeat_num = 0
                continue
            if last_content!=content:
                repeat_num = 0
            # 判断文章内容是否为空,如果为空则不加入数据库
            if content.strip()== '':
                i=i+1
                continue
            new = {
                'hotspot_data': {
                    'source': '今日头条',
                    'docid': gid_lists[i],
                    'url': s_url[i],
                    'time': ttime[0]
                },
                'content': content
            }
            data.append(new)
            # print('下载成功', content)
            data[j]['content'] = content
            last_content = content
            j = j + 1
        i=i+1
    # print(data)
    driver.quit()
    # print("finish")
    saveData(data)
Пример #30
0
def adidasbot(info):

    # Asking user for shoe model, size and retry delay
    model = input('Enter Desired Model\nex: BB6168\n')
    size = input('Enter Desired Size\n')
    delay = int(input('Set retry (minutes) delay if Out of Stock\n'))

    # Keeping chromedriver open the whole time
    options = ChromeOptions()
    options.add_experimental_option("detach", True)
    driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
    driver.set_page_load_timeout('10')

    # Shoe URL
    driver.get('https://www.adidas.com/us/ultraboost-shoes/{}.html'.format(model))

    while True:
        try:
            driver.refresh()

            # Checks to see if shoe size is available
            driver.find_element_by_xpath('//*[@id="app"]/div/div/div/div/div[3]/div/div[2]/section[1]/div[1]/div[2]/button/span[text()="{}"]'.format(size))

            # Selects shoe size
            print('Size {} found'.format(size))
            driver.find_element_by_xpath('//span[contains(text(), "{}")]'.format(size)).click()

            # Adding shoes to bag
            print('Adding to cart')
            driver.find_element_by_xpath('//span[contains(text(), "Add To Bag")]').click()

            # Proceed to delivery page
            driver.implicitly_wait(10)
            driver.get('https://www.adidas.com/us/delivery')

            # Entering in delivery information
            print('Inputting delivery information')
            driver.find_element_by_xpath('//*[@id="app"]/div/div/div/div/div[2]/div/main/form/div/div[1]/div/div[1]/div/div/div[1]/input').send_keys(info["fname"])
            driver.find_element_by_xpath('//*[@id="app"]/div/div/div/div/div[2]/div/main/form/div/div[1]/div/div[2]/div/div/div[1]/input').send_keys(info["lname"])
            driver.find_element_by_xpath('//*[@id="app"]/div/div/div/div/div[2]/div/main/form/div/div[1]/div/div[3]/div/div/div[1]/input').send_keys(info["address"])
            driver.find_element_by_xpath('//*[@id="app"]/div/div/div/div/div[2]/div/main/form/div/div[1]/div/div[5]/div/div/div[1]/input').send_keys(info["city"])
            driver.find_element_by_xpath('//*[@id="app"]/div/div/div/div/div[2]/div/main/form/div/div[1]/div/div[6]/span/div/div/select/option[text()="{}"]'.format(info["state"])).click()
            driver.find_element_by_xpath('//*[@id="app"]/div/div/div/div/div[2]/div/main/form/div/div[1]/div/div[7]/div/div/div[1]/input').send_keys(info["zip"])
            driver.find_element_by_xpath('//*[@id="app"]/div/div/div/div/div[2]/div/main/form/div/div[4]/div/div[1]/div/div/div[1]/input').send_keys(info["phone"])
            driver.find_element_by_xpath('//*[@id="app"]/div/div/div/div/div[2]/div/main/form/div/div[4]/div/div[2]/div/div/div[1]/input').send_keys(info["email"])

            # Unchecks newsletter box
            driver.find_element_by_xpath('//*[@id="app"]/div/div/div/div/div[2]/div/main/div[7]/div/div/div/label/input').click()

            # Proceed to payment page
            driver.find_element_by_xpath('//*[@id="app"]/div/div/div/div/div[2]/div/main/div[9]/button').click()

            # Entering in payment information
            print('Inputting card information')
            driver.implicitly_wait(30)
            driver.find_element_by_name('card.number').send_keys(info["cardnumber"])
            driver.implicitly_wait(30)
            driver.find_element_by_name('card.cvv').send_keys(info["cvv"])
            driver.implicitly_wait(30)
            driver.find_element_by_xpath('//form/div[4]/div[2]/input').send_keys(info["expiration"])
            
            # Placing the order
            driver.find_element_by_xpath('//*[@id="app"]/div/div/div/div/div[2]/div/main/button').click()
            print('Purchase complete')

            break

        except NoSuchElementException:

            # Refreshes webpage every mintue to check for restocks
            print("Cannot find size, Retrying in {} minute(s)".format(delay))
            time.sleep((delay*60))
            pass