Пример #1
0
def browser(config_browser, config_headless_mode, request):
    if config_browser == 'chrome':
        options = ChromeOptions()
        options.headless = config_headless_mode
        options.add_argument('--no-sandbox')
        options.add_argument('--disable-dev-shm-usage')
        # mobile_emulation = {"deviceName": "Nexus 5"}
        # options.add_experimental_option("mobileEmulation", mobile_emulation)
        driver = Chrome(options=options)
    elif config_browser == 'firefox':
        options = FirefoxOptions()
        options.headless = config_headless_mode
        options.add_argument('--no-sandbox')
        options.add_argument('--disable-dev-shm-usage')
        driver = Firefox(options=options)
    elif config_browser == 'ie':
        if config_headless_mode:
            Warning("Headless mode is not supported in IE")
        driver = Ie()
    else:
        raise Exception(f'"{config_browser}" is not a supported browser')
    driver.delete_all_cookies()
    driver.set_window_size(1920, 1080)
    driver.implicitly_wait(wait_time)

    # Return the driver object at the end of setup
    yield driver

    # For cleanup, quit the driver
    driver.quit()
Пример #2
0
class Spider:
    def __init__(self, index_url, target_url, page_range):
        self.index_url = index_url
        self.target_url = target_url
        self.page_range = page_range + 1
        self.raw_html = []
        self.boot()

    def boot(self):
        self.driver = Chrome()
        self.driver.start_client()
        self.check_cookie()

    def check_cookie(self):
        from xcookie import cookie_list
        if cookie_list:
            self.driver.get(self.index_url)
            time.sleep(8)
            self.driver.delete_all_cookies()
            print('clear')
            for c in cookie_list:
                self.driver.add_cookie(c)
            print('Done')
        else:
            print('please insert cookie!')
            sys.exit()

    def crawl(self):
        for p in range(1, self.page_range):
            full_url = f'{self.target_url}{p}'
            self.driver.get(full_url)
            print(full_url)
            time.sleep(5)
            self.raw_html.append(self.driver.page_source)
Пример #3
0
class BasePage:
    def __init__(self, maximize_window=True, hide_cookie_notice=True):
        self.hide_cookie_notice = hide_cookie_notice
        self.driver = Chrome(driver_path_resolver.resolve_driver_path())
        self._hide_cookie_notice() if self.hide_cookie_notice else None
        self.driver.implicitly_wait(DEFAULT_IMPLICITLY_WAIT)
        self.driver.maximize_window() if maximize_window else None
        self.error = None

    def _hide_cookie_notice(self):
        """
        Hiding cookie notice object by adding hideCookieNotice cookie
        """
        LOGGER.info('Started hiding cookie notice')
        wrong_path = '404'
        if self.hide_cookie_notice:
            self.driver.get(BASE_URL + wrong_path)
            LOGGER.debug('Opened 404 page')
            self.driver.delete_all_cookies()
            LOGGER.debug('Deleted all cookies')
            cookie = {'name': 'hideCookieNotice', 'value': '1'}
            self.driver.add_cookie(cookie)
            LOGGER.debug('Added cookie {}'.format(cookie))
            LOGGER.info('Cookie notice hiding finished success')
        else:
            pass

    def get_last_error(self):
        return repr(self.error)
Пример #4
0
def main():
    ''' main function loads urls from sqlite3 db and uses selenium to drive dakboard rotation'''
    #Set options for selenium chrome driver
    opt = Options()
    opt.add_argument("--kiosk")
    opt.add_argument("disk-cache-size=0")
    opt.add_experimental_option("useAutomationExtension", False)
    opt.add_experimental_option("excludeSwitches", ["enable-automation"])
    #formatter = SyslogBOMFormatter(logging.BASIC_FORMAT)

    #start the browser
    driver = Chrome(options=opt)
    timeout = 1

    while True:
        boards = getActiveList()
        #display each url in boards
        for url in boards:
            driver.get(url)
            driver.delete_all_cookies()
            # test & remove bad links from rotation;
            # adapted from https://selenium-python.readthedocs.io/waits.html
            if driver.title != "MyView":
                try:
                    elem_present = \
                        expected_conditions.presence_of_element_located((By.ID, "dak-banner"))
                    WebDriverWait(driver, timeout).until(elem_present)
                except TimeoutException:
                    logger.error("Myview timed out loading %s", url)
                    boards.remove(url)
                finally:
                    logger.debug("MyView loaded: %s", url)
            time.sleep(DISPLAY_TIME)
    #tear down the driver
    driver.quit()
Пример #5
0
class WebDriver:
    def __init__(self, headless=True):
        options = Options()
        options.headless = headless
        self.driver = Chrome(chrome_options=options)
        self.in_course = False
        self.driver.implicitly_wait(5)
        self.logged_in = False
        self.courses = {}

    def login(self, username, password):
        self.driver.delete_all_cookies()
        self.driver.get(login_url)
        username_element = self.driver.find_element_by_id('username')
        username_element.send_keys(username)
        passwd_element = self.driver.find_element_by_id('password')
        passwd_element.send_keys(password)
        print('Logging in ... ')
        passwd_element.send_keys(Keys.RETURN)
        self.logged_in = True
        print('Succesfully logged in.')
        raw_courses = self.driver.find_element_by_class_name('coursefakeclass')
        raw_courses_list = raw_courses.find_elements_by_xpath('li')
        for raw_course in raw_courses_list:
            raw_link = raw_course.find_element_by_xpath('a')
            description = raw_link.text
            url = raw_link.get_attribute('href')
            course = Course(description, url)
            self.courses[course.name] = course
        print('Succesfully loaded courses.')

    def enter_course(self, course_name):
        course = self.courses[course_name]
        self.driver.get(course.urls['course'])
        self.in_course = True
        name = course.department + ' ' + course.crn + '.' + course.section
        print('Entered course: ' + name + '.')

    def create_announcement(self, subject, announcement):
        if self.in_course:
            self.driver.find_element_by_xpath('//*[@id="nav"]/li/a').click()
            subject_field = self.driver.find_element_by_xpath(
                '//*[@id="subject"]')
            subject_field.send_keys(subject)
            iframe = self.driver.find_element_by_xpath(
                '//*[@id="messagetext_ifr"]')
            self.driver.switch_to.frame(iframe)
            html_input = self.driver.find_element_by_xpath('html/body')
            html_input.send_keys(announcement)
            self.driver.switch_to.default_content()
            button = self.driver.find_element_by_class_name('submit')
            button.click()
            print('Succesfully created and posted announcement.')

        else:
            print('You are not in any course. Can`t make the announcement.')

    def quit(self):
        self.driver.quit()
Пример #6
0
def main_fun():
    options = Options()
    options.add_argument("headless")
    options.add_argument("disable-gpu")
    options.add_argument("no-sandbox")
    browser = Chrome(options=options)
    browser.set_window_size(1280, 1024)
    browser.maximize_window()
    browser.delete_all_cookies()
    try:
        hh_worker(browser)
    except Exception as ex:
        print(ex)
        traceback.print_exc()
    finally:
        browser.delete_all_cookies()
        browser.close()
        browser.quit()
Пример #7
0
def loop_tab_based(driver: webdriver.Chrome, project: Project):
    driver.delete_all_cookies()
    driver.get(project.url)

    try:
        driver.find_element_by_class_name("confirmVote").click()
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "voteComplete")))
        logs_maker.success(project)
    except:
        logs_maker.fail(project)

    # TODO: count successes on file copy (so write acces wont fail in logs_counter)
    counted_successes = count_check.get_success_num(project.url)
    print(counted_successes, "/", project.times_to_vote)

    if counted_successes < project.times_to_vote:
        loop_tab_based(driver, project)
    else:
        print("Finished!")
Пример #8
0
class S(object):
    def __init__(self):
        self.path = '/root/.wdm/drivers/chromedriver/80.0.3987.106/linux64/chromedriver'
        option = ChromeOptions()
        option.add_argument('--headless')
        prefs = {
            'profile.default_content_setting_values': {
                'notifications': 2
            }
        }
        option.add_experimental_option('prefs', prefs)
        option.add_argument('--no-sandbox')
        option.add_argument('--disable-dev-shm-usage')
        option.add_argument('--disable-extensions')
        option.add_argument('--disable-gpu')
        option.add_argument("--disable-features=VizDisplayCompositor")
        option.add_experimental_option('excludeSwitches',
                                       ['enable-automation'])
        self.wd = Chrome(options=option, executable_path=self.path)
        # 移除webdriver
        self.wd.execute_cdp_cmd(
            "Page.addScriptToEvaluateOnNewDocument", {
                "source":
                """
                           Object.defineProperty(navigator, 'webdriver', {
                             get: () => undefined
                           })
                         """
            })
        self.wd.set_page_load_timeout(20)
        self.timeout = WebDriverWait(self.wd, 20)
        self.url = 'https://graph.qq.com/oauth2.0/show?which=Login&display=pc&response_type=code&client_id=101477621&redirect_uri=https%3A%2F%2Fsso.e.qq.com%2Fpassport%3Fsso_redirect_uri%3Dhttps%253A%252F%252Fe.qq.com%252Fads%252F%26service_tag%3D1&scope=get_user_info'
        self.users = 'xxx'
        self.passwd = 'xxx'

    def run(self):
        self.wd.get(self.url)
        self.wd.implicitly_wait(10)
        self.wd.delete_all_cookies()
        time.sleep(2)

        iframe = self.wd.find_element_by_xpath('//iframe')
        self.wd.switch_to.frame(iframe)

        self.wd.find_element_by_id('switcher_plogin').click()
        time.sleep(1)
        self.wd.find_element_by_id('u').clear()
        time.sleep(1)
        self.wd.find_element_by_id('u').send_keys(self.users)
        time.sleep(2)
        self.wd.find_element_by_id('p').clear()
        time.sleep(1)
        self.wd.find_element_by_id('p').send_keys(self.passwd)
        time.sleep(2)
        self.wd.find_element_by_id('login_button').click()
        time.sleep(5)
        try:
            tips = self.wd.find_element_by_id('qlogin_tips_2').text
            if '由于你的帐号存在异常,需要进行手机验证,' in tips:
                while True:
                    dd_notice('需要扫描二维码...', dd_token_url)
                    time.sleep(2)
                    self.wd.save_screenshot('qrImg.png')
                    im = Image.open('qrImg.png')
                    im.save('qrImg.png')
                    time.sleep(30)
                    requests.get(
                        'https://e.qq.com/atlas/8944022/admanage/campaign',
                        verify=False)
                    time.sleep(2)
                    if 'gdt_token' in json.dumps(self.wd.get_cookies()):
                        dd_notice('二维码验证成功!!!', dd_token_url)
                        break
                    else:
                        dd_notice('二维码验证失败!重试中...', dd_token_url)
        except Exception as e:
            dd_notice('不需要二维码验证!', dd_token_url)

        try:
            while True:
                time.sleep(3)
                iframe = self.wd.find_element_by_xpath('//iframe')
                self.wd.switch_to.frame(iframe)
                time.sleep(1)
                flags = self.wd.find_element_by_xpath(
                    '//*[@id="guideText"]').text
                if '拖动下方滑块完成拼图' == flags:
                    dd_notice('需要滑块!!!', dd_token_url)
                    src_url = self.wd.find_element_by_xpath(
                        '//*[@id="slideBg"]').get_attribute('src')
                    res = requests.get(url=src_url, verify=False)
                    with open('crack.jpeg', 'wb') as f:
                        f.write(res.content)
                    time.sleep(3)
                    slid_ing = self.wd.find_element_by_id(
                        'tcaptcha_drag_button')
                    ActionChains(
                        self.wd).click_and_hold(on_element=slid_ing).perform()
                    time.sleep(0.2)
                    position = qq_mark_detect('crack.jpeg').x.values[0]
                    real_position = position * (280 / 680) - 23
                    track_list = self.get_track(int(real_position))
                    for track in track_list:
                        ActionChains(self.wd).move_by_offset(
                            xoffset=track, yoffset=0).perform()
                        time.sleep(0.002)
                    ActionChains(self.wd).release().perform()
                    time.sleep(2)
                    requests.get(
                        'https://e.qq.com/atlas/8944022/admanage/campaign',
                        verify=False)
                    time.sleep(2)
                    print(self.wd.get_cookies())
                    if 'gdt_token' in json.dumps(self.wd.get_cookies()):
                        dd_notice('滑块验证成功!!!', dd_token_url)
                        break
                    else:
                        dd_notice('滑块验证验证失败!重试中...', dd_token_url)
                else:
                    dd_notice('不需要滑块!!!', dd_token_url)
        except Exception as e:
            dd_notice('不需要滑块!', dd_token_url)
        cookies_data = self.wd.get_cookies()
        try:
            if 'gdt_token' in json.dumps(
                    cookies_data) and 'gdt_protect' in json.dumps(
                        cookies_data):
                cookies = {}
                for data in cookies_data:
                    if 'gdt_protect' in data.values():
                        gdt_protect = data.get('value')
                        if gdt_protect:
                            cookies['gdt_protect'] = gdt_protect
                    if 'gdt_token' in data.values():
                        gdt_token = data.get('value')
                        if gdt_token:
                            cookies['gdt_token'] = gdt_token
                dd_notice(f'获取的cookies: {cookies}', dd_token_url)
                time.sleep(2)
                self.close()
            else:
                dd_notice('未成功获取cookies, 需手动重试!!!!!', dd_token_url)
                self.close()
        except Exception as e:
            dd_notice('广点通自动化登陆失败!!!需手动重试!!!!!', dd_token_url)
            self.close()

    @staticmethod
    def get_track(distance):
        """
        模拟轨迹 假装是人在操作
        :param distance:
        :return:
        """
        v = 0
        t = 0.2
        tracks = []
        current = 0
        mid = distance * 7 / 8

        distance += 10
        while current < distance:
            if current < mid:
                a = random.randint(2, 4)
            else:
                a = -random.randint(3, 5)

            v0 = v
            s = v0 * t + 0.5 * a * (t**2)
            current += s
            tracks.append(round(s))

            v = v0 + a * t

        for i in range(4):
            tracks.append(-random.randint(2, 3))
        for i in range(4):
            tracks.append(-random.randint(1, 3))
        return tracks

    def close(self):
        self.wd.close()
Пример #9
0
class BoxDriver(object):
    """
    a simple usage of selenium framework tool
    """
    """
    私有全局变量
    """
    _web_driver = None
    _by_char = None
    _wait_seconds = None
    """
    构造方法
    """
    class DriverType(Enum):
        CHROME = 1,
        FIREFOX = 2,
        IE = 3,
        SAFARI = 4,
        CHROME_HEADLESS = 5

    def __init__(self,
                 driver_type: DriverType,
                 by_char=_CHARACTER_COMMA,
                 wait_seconds=_WAIT_SECONDS,
                 firefox_profile=None):
        """
        构造方法:实例化 BoxDriver 时候使用
        :type wait_seconds: object
        :param driver_type: DriverType: selenium driver
        :param by_char: 分隔符,默认使用","
        :param firefox_profile: 火狐浏览器配置
        """
        self._by_char = by_char
        self._wait_seconds = wait_seconds

        if driver_type is None or driver_type == "":
            driver_type = self.DriverType.CHROME

        self._set_selenium_driver(driver_type, firefox_profile)

    def _set_selenium_driver(self, driver_type, firefox_profile):

        if driver_type == self.DriverType.CHROME:
            self._web_driver = Chrome()

        elif driver_type == self.DriverType.FIREFOX:

            if firefox_profile and os.path.exists(firefox_profile):
                profile = FirefoxProfile(firefox_profile)
                self._web_driver = Firefox(firefox_profile=profile)
            else:
                self._web_driver = Firefox()
        elif driver_type == self.DriverType.IE:
            self._web_driver = Ie()

        elif driver_type == self.DriverType.SAFARI:
            self._web_driver = Safari()

        elif driver_type == self.DriverType.CHROME_HEADLESS:
            profile = ChromeOptions()
            profile.add_argument('headless')
            profile.add_experimental_option("excludeSwitches",
                                            ["ignore-certificate-errors"])
            self._web_driver = Chrome(options=profile)

        else:
            self._web_driver = Chrome()
            print("Invalid Driver Type filled: %r" % driver_type)

    """
    私有方法
    """

    def _convert_selector_to_locator(self, selector):
        """
        转换自定义的 selector 为 Selenium 支持的 locator
        :param selector: 定位字符,字符串类型,"i, xxx"
        :return: locator
        """
        if self._by_char not in selector:
            return By.ID, selector

        selector_by = selector.split(self._by_char)[0].strip()
        selector_value = selector.split(self._by_char)[1].strip()
        if selector_by == "i" or selector_by == 'id':
            locator = (By.ID, selector_value)
        elif selector_by == "n" or selector_by == 'name':
            locator = (By.NAME, selector_value)
        elif selector_by == "c" or selector_by == 'class_name':
            locator = (By.CLASS_NAME, selector_value)
        elif selector_by == "l" or selector_by == 'link_text':
            locator = (By.LINK_TEXT, selector_value)
        elif selector_by == "p" or selector_by == 'partial_link_text':
            locator = (By.PARTIAL_LINK_TEXT, selector_value)
        elif selector_by == "t" or selector_by == 'tag_name':
            locator = (By.TAG_NAME, selector_value)
        elif selector_by == "x" or selector_by == 'xpath':
            locator = (By.XPATH, selector_value)
        elif selector_by == "s" or selector_by == 'css_selector':
            locator = (By.CSS_SELECTOR, selector_value)
        else:
            raise NameError(
                "Please enter a valid selector of targeting elements.")

        return locator

    def _locate_element(self, selector):
        """
        to locate element by selector
        :arg
        selector should be passed by an example with "i,xxx"
        "x,//*[@id='langs']/button"
        :returns
        DOM element
        """
        locator = self._convert_selector_to_locator(selector)
        if locator is not None:
            element = self._web_driver.find_element(*locator)
        else:
            raise NameError(
                "Please enter a valid locator of targeting elements.")

        return element

    def _locate_elements(self, selector):
        """
        to locate element by selector
        :arg
        selector should be passed by an example with "i,xxx"
        "x,//*[@id='langs']/button"
        :returns
        DOM element
        """
        locator = self._convert_selector_to_locator(selector)
        if locator is not None:
            elements = self._web_driver.find_elements(*locator)
        else:
            raise NameError(
                "Please enter a valid locator of targeting elements.")

        return elements

    """
    cookie 相关方法
    """

    def clear_cookies(self):
        """
        clear all cookies after driver init
        """
        self._web_driver.delete_all_cookies()

    def add_cookies(self, cookies):
        """
        Add cookie by dict
        :param cookies:
        :return:
        """
        self._web_driver.add_cookie(cookie_dict=cookies)

    def add_cookie(self, cookie_dict):
        """
        Add single cookie by dict
        添加 单个 cookie
        如果该 cookie 已经存在,就先删除后,再添加
        :param cookie_dict: 字典类型,有两个key:name 和 value
        :return:
        """
        cookie_name = cookie_dict["name"]
        cookie_value = self._web_driver.get_cookie(cookie_name)
        if cookie_value is not None:
            self._web_driver.delete_cookie(cookie_name)

        self._web_driver.add_cookie(cookie_dict)

    def remove_cookie(self, name):
        """
        移除指定 name 的cookie
        :param name:
        :return:
        """
        # 检查 cookie 是否存在,存在就移除
        old_cookie_value = self._web_driver.get_cookie(name)
        if old_cookie_value is not None:
            self._web_driver.delete_cookie(name)

    """
    浏览器本身相关方法
    """

    def refresh(self, url=None):
        """
        刷新页面
        如果 url 是空值,就刷新当前页面,否则就刷新指定页面
        :param url: 默认值是空的
        :return:
        """
        if url is None:
            self._web_driver.refresh()
        else:
            self._web_driver.get(url)

        self.forced_wait(self._wait_seconds)

    def maximize_window(self):
        """
        最大化当前浏览器的窗口
        :return:
        """
        self._web_driver.maximize_window()

    def navigate(self, url):
        """
        打开 URL
        :param url:
        :return:
        """
        self._web_driver.get(url)
        self.forced_wait(self._wait_seconds)

    def quit(self):
        """
        退出驱动
        :return:
        """
        self._web_driver.quit()

    def close_browser(self):
        """
        关闭浏览器
        :return:
        """
        self._web_driver.close()

    """
    基本元素相关方法
    """

    def type(self, selector, text):
        """
        Operation input box.

        Usage:
        driver.type("i,el","selenium")
        """
        el = self._locate_element(selector)
        el.clear()
        el.send_keys(text)

    def click(self, selector):
        """
        It can click any text / image can be clicked
        Connection, check box, radio buttons, and even drop-down box etc..

        Usage:
        driver.click("i,el")
        """
        el = self._locate_element(selector)
        el.click()
        self.forced_wait(self._wait_seconds)

    def click_by_enter(self, selector):
        """
        It can type any text / image can be located  with ENTER key

        Usage:
        driver.click_by_enter("i,el")
        """
        el = self._locate_element(selector)
        el.send_keys(Keys.ENTER)

        self.forced_wait(self._wait_seconds)

    def click_by_text(self, text):
        """
        Click the element by the link text

        Usage:
        driver.click_text("新闻")
        """
        self._locate_element('p%s' % self._by_char + text).click()
        self.forced_wait(self._wait_seconds)

    def submit(self, selector):
        """
        Submit the specified form.

        Usage:
        driver.submit("i,el")
        """
        el = self._locate_element(selector)
        el.submit()

        self.forced_wait(self._wait_seconds)

    def move_to(self, selector):
        """
        to move mouse pointer to selector
        :param selector:
        :return:
        """
        el = self._locate_element(selector)
        ActionChains(self._web_driver).move_to_element(el).perform()
        self.forced_wait(self._wait_seconds)

    def right_click(self, selector):
        """
        to click the selector by the right button of mouse
        :param selector:
        :return:
        """
        el = self._locate_element(selector)
        ActionChains(self._web_driver).context_click(el).perform()
        self.forced_wait(self._wait_seconds)

    def count_elements(self, selector):
        """
        数一下元素的个数
        :param selector: 定位符
        :return:
        """
        els = self._locate_elements(selector)
        return len(els)

    def drag_element(self, source, target):
        """
        拖拽元素
        :param source:
        :param target:
        :return:
        """

        el_source = self._locate_element(source)
        el_target = self._locate_element(target)

        if self._web_driver.w3c:
            ActionChains(self._web_driver).drag_and_drop(el_source,
                                                         el_target).perform()
        else:
            ActionChains(self._web_driver).click_and_hold(el_source).perform()
            ActionChains(self._web_driver).move_to_element(el_target).perform()
            ActionChains(self._web_driver).release(el_target).perform()

        self.forced_wait(self._wait_seconds)

    def lost_focus(self):
        """
        当前元素丢失焦点
        :return:
        """
        ActionChains(self._web_driver).key_down(Keys.TAB).key_up(
            Keys.TAB).perform()
        self.forced_wait(self._wait_seconds)

    """
    <select> 元素相关
    """

    def select_by_index(self, selector, index):
        """
        It can click any text / image can be clicked
        Connection, check box, radio buttons, and even drop-down box etc..

        Usage:
        driver.select_by_index("i,el")
        """
        el = self._locate_element(selector)
        Select(el).select_by_index(index)

        self.forced_wait(self._wait_seconds)

    def get_selected_text(self, selector):
        """
        获取 Select 元素的选择的内容
        :param selector: 选择字符 "i, xxx"
        :return: 字符串
        """
        el = self._locate_element(selector)
        selected_opt = Select(el).first_selected_option()
        return selected_opt.text

    def select_by_visible_text(self, selector, text):
        """
        It can click any text / image can be clicked
        Connection, check box, radio buttons, and even drop-down box etc..

        Usage:
        driver.select_by_index("i,el")
        """
        el = self._locate_element(selector)
        Select(el).select_by_visible_text(text)

        self.forced_wait(self._wait_seconds)

    def select_by_value(self, selector, value):
        """
        It can click any text / image can be clicked
        Connection, check box, radio buttons, and even drop-down box etc..

        Usage:
        driver.select_by_index("i,el")
        """
        el = self._locate_element(selector)
        Select(el).select_by_value(value)

        self.forced_wait(self._wait_seconds)

    """
    JavaScript 相关
    """

    def execute_js(self, script):
        """
        Execute JavaScript scripts.

        Usage:
        driver.js("window.scrollTo(200,1000);")
        """
        self._web_driver.execute_script(script)

        self.forced_wait(self._wait_seconds)

    """
    元素属性相关方法
    """

    def get_value(self, selector):
        """
        返回元素的 value
        :param selector: 定位字符串
        :return:
        """
        el = self._locate_element(selector)
        return el.get_attribute("value")

    def get_attribute(self, selector, attribute):
        """
        Gets the value of an element attribute.

        Usage:
        driver.get_attribute("i,el","type")
        """
        el = self._locate_element(selector)
        return el.get_attribute(attribute)

    def get_text(self, selector):
        """
        Get element text information.

        Usage:
        driver.get_text("i,el")
        """
        el = self._locate_element(selector)
        return el.text

    def get_displayed(self, selector):
        """
        Gets the element to display,The return result is true or false.

        Usage:
        driver.get_display("i,el")
        """
        el = self._locate_element(selector)
        return el.is_displayed()

    def get_selected(self, selector):
        """
        to return the selected status of an WebElement
        :param selector: selector to locate
        :return: True False
        """
        el = self._locate_element(selector)
        return el.is_selected()

    def get_text_list(self, selector):
        """
        根据selector 获取多个元素,取得元素的text 列表
        :param selector:
        :return: list
        """

        el_list = self._locate_elements(selector)

        results = []
        for el in el_list:
            results.append(el.text)

        return results

    """
    窗口相关方法
    """

    def accept_alert(self):
        '''
            Accept warning box.

            Usage:
            driver.accept_alert()
            '''
        self._web_driver.switch_to.alert.accept()

        self.forced_wait(self._wait_seconds)

    def dismiss_alert(self):
        '''
        Dismisses the alert available.

        Usage:
        driver.dismissAlert()
        '''
        self._web_driver.switch_to.alert.dismiss()

        self.forced_wait(self._wait_seconds)

    def switch_to_frame(self, selector):
        """
        Switch to the specified frame.

        Usage:
        driver.switch_to_frame("i,el")
        """
        el = self._locate_element(selector)
        self._web_driver.switch_to.frame(el)

        self.forced_wait(self._wait_seconds)

    def switch_to_default(self):
        """
        Returns the current form machine form at the next higher level.
        Corresponding relationship with switch_to_frame () method.

        Usage:
        driver.switch_to_default()
        """
        self._web_driver.switch_to.default_content()

        self.forced_wait(self._wait_seconds)

    def switch_to_parent(self):
        """
        switch to parent frame
        :return:
        """
        self._web_driver.switch_to.parent_frame()

        self.forced_wait(self._wait_seconds)

    def switch_to_window_by_title(self, title):
        for handle in self._web_driver.window_handles:
            self._web_driver.switch_to.window(handle)
            if self._web_driver.title == title:
                break

            self._web_driver.switch_to.default_content()
            self.forced_wait(self._wait_seconds)

    def open_new_window(self, selector):
        '''
        Open the new window and switch the handle to the newly opened window.

        Usage:
        driver.open_new_window()
        '''
        original_windows = self._web_driver.current_window_handle
        el = self._locate_element(selector)
        el.click()
        all_handles = self._web_driver.window_handles
        for handle in all_handles:
            if handle != original_windows:
                self._web_driver.switch_to.window(handle)
                break

    def save_window_snapshot(self, file_name):
        """
        save screen snapshot
        :param file_name: the image file name and path
        :return:
        """
        driver = self._web_driver
        driver.save_screenshot(file_name)
        self.forced_wait(self._wait_seconds)

    def save_window_snapshot_by_png(self):
        return self._web_driver.get_screenshot_as_png()

    def save_element_snapshot_by_png(self, selector):
        """
        控件截图
        :param selector:
        :return:
        """
        el = self._locate_element(selector)
        self.forced_wait(self._wait_seconds)
        return el.screenshot_as_png

    def save_window_snapshot_by_io(self):
        """
        保存截图为文件流
        :return:
        """
        return self._web_driver.get_screenshot_as_base64()

    def save_element_snapshot_by_io(self, selector):
        """
        控件截图
        :param selector:
        :return:
        """
        el = self._locate_element(selector)
        return el.screenshot_as_base64

    """
    等待方法
    """

    @staticmethod
    def forced_wait(seconds):
        """
        强制等待
        :param seconds:
        :return:
        """
        time.sleep(seconds)

    def implicitly_wait(self, seconds):
        """
        Implicitly wait. All elements on the page.
        :param seconds 等待时间 秒
        隐式等待

        Usage:
        driver.implicitly_wait(10)
        """
        self._web_driver.implicitly_wait(seconds)

    def explicitly_wait(self, selector, seconds):
        """
        显式等待
        :param selector: 定位字符
        :param seconds: 最长等待时间,秒
        :return:
        """
        locator = self._convert_selector_to_locator(selector)

        WebDriverWait(self._web_driver, seconds).until(
            expected_conditions.presence_of_element_located(locator))

    def get_explicitly_wait_element_text(self, selector, seconds):
        """
        显式等待,得到元素的 text
        :param selector: locator
        :param seconds: max timeout sencods
        :return:  str, element.text
        """

        locator = self._convert_selector_to_locator(selector)
        driver = self._web_driver

        el = WebDriverWait(driver,
                           seconds).until(lambda d: d.find_element(*locator))
        if el and isinstance(el, WebElement):
            return el.text

        return None

    """
    属性
    """

    @property
    def current_title(self):
        '''
        Get window title.

        Usage:
        driver.current_title
        '''
        return self._web_driver.title

    @property
    def current_url(self):
        """
        Get the URL address of the current page.

        Usage:
        driver.current_url
        """
        return self._web_driver.current_url
Пример #10
0
    def run(self):
        try:
            import os

            driverName = "\\chromedriver.exe"

            # defining base file directory of chrome drivers
            driver_loc = os.path.dirname(
                os.path.abspath(__file__)) + "\\ChromeDriver\\"

            # defining the file path of your exe file automatically updating based on your browsers current version of chrome.
            currentPath = driver_loc + chrome_browser_version + driverName

            # check if new version of drive exists --> only continue if it doesn't
            Newpath = driver_loc + nextVersion

            # check if we have already downloaded the newest version of the browser
            newfileloc = Newpath + driverName
            newpathexists = os.path.exists(newfileloc)

            if newpathexists == False:
                try:
                    # open chrome driver and attempt to download new chrome driver exe file.

                    # set the arguments and options
                    chromeOptions = Options()
                    chromeOptions.add_experimental_option(
                        "prefs",
                        {
                            "download.default_directory": driver_loc,
                            "download.prompt_for_download": False,
                            "download.directory_upgrade": True,
                            "safebrowsing.enabled": True,
                            "profile.managed_default_content_settings.images":
                            2,
                        },
                    )
                    chromeOptions.add_experimental_option(
                        "excludeSwitches", ["enable-logging"])
                    chromeOptions.add_argument("--headless")
                    chromeOptions.add_argument(
                        "--blink-settings=imagesEnabled=false")
                    chromeOptions.add_argument("--disable-popup-blocking")
                    chromeOptions.add_argument("--ignore-certificate-errors")
                    chromeOptions.add_argument("--allow-insecure-localhost")
                    chromeOptions.add_argument(
                        "--allow-running-insecure-content")
                    chromeOptions.accept_untrusted_certs = True
                    chromeOptions.assume_untrusted_cert_issuer = True
                    service_args = ["hide_console"]

                    try:
                        print("~~~Calling Update Driver")
                        update_driver = Chrome(
                            executable_path=currentPath,
                            options=chromeOptions,
                            service_args=service_args,
                        )
                        print("~~~Update Driver Opened")

                        # opening up url of chromedriver to get new version of chromedriver.
                        chromeDriverURL = (
                            "https://chromedriver.storage.googleapis.com/index.html?path="
                            + nextVersion)
                        update_driver.set_page_load_timeout(10)
                        update_driver.delete_all_cookies()
                        update_driver.get(chromeDriverURL)
                        print("~~~Update Website Got")

                        # time.sleep(5)
                        event.wait(5)
                        # find records of table rows
                        table = update_driver.find_elements_by_css_selector(
                            "tr")

                        # check the length of the table
                        Table_len = len(table)

                        # ensure that table length is greater than 4, else fail. -- table length of 4 is default when there are no availble updates
                        if Table_len > 4:

                            # define string value of link
                            rowText = table[(len(table) - 2)].text[:6]
                            # time.sleep(1)
                            event.wait(1)

                            # select the value of the row
                            update_driver.find_element_by_xpath(
                                "//*[contains(text()," + '"' + str(rowText) +
                                '"' + ")]").click()
                            event.wait(1)
                            # time.sleep(1)

                            # select chromedriver zip for windows
                            update_driver.find_element_by_xpath(
                                "//*[contains(text()," + '"' + "win32" + '"' +
                                ")]").click()
                            print("~~~Download Started")

                            # time.sleep(5)
                            event.wait(5)
                            update_driver.quit()

                            print("~~~Update Driver Exited")

                            try:
                                from zipfile import ZipFile
                                import shutil

                                fileName = os.path.join(
                                    os.path.dirname(driver_loc),
                                    "chromedriver_win32.zip",
                                )

                                # Create a ZipFile Object and load sample.zip in it
                                with ZipFile(fileName, "r") as zipObj:
                                    # Extract all the contents of zip file in different directory
                                    zipObj.extractall(Newpath)

                                print("~~~Newer Version Extracted")
                            except Exception as ex:
                                print(
                                    "Error in extracting:\t\tAn exception of type {0} occurred. Arguments:\n{1}"
                                    .format(type(ex).__name__, ex.args))

                            try:
                                # delete downloaded file
                                os.remove(fileName)
                                print("Downloaded Zip Deleted")
                            except Exception as ex:
                                print(
                                    "~~~Error in deleting zip:\t\tAn exception of type {0} occurred. Arguments:\n{1}"
                                    .format(type(ex).__name__, ex.args))

                            # defining old chrome driver location
                            oldPath = driver_loc + lastVersion
                            oldpathexists = os.path.exists(oldPath)

                            # this deletes the old folder with the older version of chromedriver in it
                            if oldpathexists == True:
                                try:
                                    import stat

                                    shutil.rmtree(oldPath, ignore_errors=True)

                                    print("Old Version Deleted")
                                except Exception as ex:
                                    print(
                                        "~~~Error in deleting previous version:\t\tAn exception of type {0} occurred. Arguments:\n{1}"
                                        .format(type(ex).__name__, ex.args))

                        else:
                            # update_driver.quit()
                            print("~~~No new version available")

                    except Exception as ex:
                        print(
                            "~~~Error in update driver:\t\tAn exception of type {0} occurred. Arguments:\n{1}"
                            .format(type(ex).__name__, ex.args))

                    finally:
                        try:
                            # close the driver
                            update_driver.quit()
                            print("~~~Update Driver Exited")
                        except Exception as ex:
                            print(
                                "~~~Error in quitting:\t\tAn exception of type {0} occurred. Arguments:\n{1}"
                                .format(type(ex).__name__, ex.args))
                except Exception as ex:
                    print(
                        "~~~Error in if:\t\tAn exception of type {0} occurred. Arguments:\n{1}"
                        .format(type(ex).__name__, ex.args))
            else:
                print("~~~ChromeDriver Upto Date~~~")

        except Exception as ex:
            print(
                "~~~Error in update:\t\tAn exception of type {0} occurred. Arguments:\n{1}"
                .format(type(ex).__name__, ex.args))
Пример #11
0
    def run(self):
        """
            Will check wheather the portal in logged in after a set interval and will sign in if not. 
        """
        try:

            # set the arguments and options
            chromeOptions = Options()
            prefs = {"profile.managed_default_content_settings.images": 2}
            chromeOptions.add_experimental_option("prefs", prefs)
            chromeOptions.add_experimental_option("excludeSwitches",
                                                  ["enable-logging"])
            chromeOptions.add_argument("--headless")
            chromeOptions.add_argument("--blink-settings=imagesEnabled=false")
            chromeOptions.add_argument("--disable-popup-blocking")
            chromeOptions.add_argument("--ignore-certificate-errors")
            chromeOptions.add_argument("--allow-insecure-localhost")
            chromeOptions.add_argument("--allow-running-insecure-content")
            chromeOptions.accept_untrusted_certs = True
            chromeOptions.assume_untrusted_cert_issuer = True
            service_args = ["hide_console"]
            currentPath = (os.path.dirname(os.path.abspath(__file__)) +
                           "\\ChromeDriver\\" + chrome_browser_version +
                           "\\chromedriver.exe")

            while True:
                try:
                    print("\nCalling Driver")

                    # Creating an instance of chrome
                    driver = Chrome(
                        executable_path=currentPath,
                        options=chromeOptions,
                        service_args=service_args,
                    )
                    print("Driver Called")
                    driver.set_page_load_timeout(10)
                    driver.delete_all_cookies()

                    # open a page
                    driver.get("Enter Checking Site Here")
                    print("Getting Site")
                    try:
                        """
                        
                            remove this try except if the your wifi doesn't block websites
                        
                        """

                        # xpath if the website is blocked
                        element = driver.find_element_by_xpath(
                            "Enter xpath to an element in the blocked page")
                        print("Site Blocked\n")

                    except:
                        try:
                            # xpath to any thing in the website to make sure you are connected to the internet
                            element = driver.find_element_by_xpath(
                                "/Enter xpath to an element in the page")
                            print("Site Opening\n")

                        except:
                            try:
                                """
                                
                                    if your portal doesn't have auto redirect, uncomment the following line and type in your login url
                                
                                """

                                # driver.get("Paste Login Webiste URL Here")

                                # change the ids to those in your login website
                                # you can use developer mode to find the id of fields (use ctrl + shift + i)
                                # change the username and password to the required one
                                print("Trying To Login")
                                # select usnername field
                                element = driver.find_element_by_id(
                                    "Ending id of user input field")
                                print("User Found")
                                element.send_keys("Enter username")
                                print("User Inputted")
                                # select password field
                                element = driver.find_element_by_id(
                                    "Ending id of password input field")
                                print("Passwprd Found")
                                element.send_keys("Enter password")
                                print("Password Inputted")
                                # select submit button
                                element = driver.find_element_by_id(
                                    "Enter id of submit button")
                                print("Button Found")
                                element.click()
                                print("Logged In\n")
                            # except NoSuchElementException as ex:
                            #     print("Can't Login")
                            #     event.wait(120)
                            except Exception as ex:
                                print(
                                    "Can't login:\t\tAn exception of type {0} occurred. Arguments:\n{1}"
                                    .format(type(ex).__name__, ex.args))
                                event.wait(60)
                                continue

                except Exception as ex:
                    print(
                        "Error in loop:\t\tAn exception of type {0} occurred. Arguments:\n{1}"
                        .format(type(ex).__name__, ex.args))
                    try:
                        driver.quit()
                    except Exception as ex:
                        print(
                            "Error in Quitting:\t\tAn exception of type {0} occurred. Arguments:\n{1}"
                            .format(type(ex).__name__, ex.args))

                    event.wait(60)
                    continue

                try:
                    driver.quit()
                except Exception as ex:
                    print(
                        "Error in Quitting in loop:\t\tAn exception of type {0} occurred. Arguments:\n{1}"
                        .format(type(ex).__name__, ex.args))
                event.wait(300)
                continue

        except Exception as ex:
            print(
                "Error outside loop:\t\tAn exception of type {0} occurred. Arguments:\n{1}"
                .format(type(ex).__name__, ex.args))

        finally:
            try:
                driver.quit()
            except Exception as ex:
                print(
                    "Error in Quitting in final:\t\tAn exception of type {0} occurred. Arguments:\n{1}"
                    .format(type(ex).__name__, ex.args))
            finally:
                print("Login Thread Exited")
Пример #12
0
    opts = Options()
    opts.add_argument(
        "--user-data-dir=" +
        config["activeProfilePath"])  # add user data to chrome-data folder
    # opts.add_argument("user-data-dir=C:\\Users\\AtechM_03\\AppData\\Local\\Google\\Chrome\\User Data\\Profile 2")
    if (config["showInterface"] != "true"):
        opts.set_headless()
        opts.add_argument('headless')
        opts.add_argument('--disable-infobars')
        opts.add_argument('--disable-dev-shm-usage')
        opts.add_argument('--no-sandbox')
        # opts.add_argument('--remote-debugging-port=9222')
    # print(config["webDataPath"])
    browser = Chrome(options=opts)
    # browser.implicitly_wait(20)
    browser.delete_all_cookies()
    # browser = Chrome()
    browser.get(config["inventoryUrl"])
    printLog("Open inventory manager")
    # time.sleep(1)
    if (getElementByXpathUntilTimeout(browser, config["loginDoneId"],
                                      3) == False):

        try:
            browser.find_element_by_xpath(config["emailId"]).send_keys(
                config["activeEmail"])
        except:
            print("skip email!")
        browser.find_element_by_xpath(config["passwordId"]).send_keys(
            config["activePassword"])
        browser.find_element_by_xpath(config["rememberLoginId"]).click()
Пример #13
0
class Crawler:
    def __init__(self, profile_manager, driver_path, config):
        """
        :param profile_manager: A ProfileManager object with all profiles loaded already
        :param driver_path: The path to the browser driver that is being used for scraping
        :param config: A CrawlerConfig object
        """

        # Parameters
        self.cfg = config
        self.driver_path = driver_path

        # Controls
        self.profile_manager = profile_manager
        self.crawled_urls = []
        self.profiles_since_break = 0
        self.browser = None
        self.current_agent = None

    def run(self):
        """ This runs in a new thread when crawler.run() is called """

        self._start_browser()

        # Start crawling each URL
        for url in self.cfg.websites:
            print("Starting Crawling on Seed: ", url)
            self._crawl_page(url)

        self._close_browser()

    def _crawl_page(self, url):
        """
        If the url is a linkedin url, it will check if it is a profile, save it, and add it to crawled URL's.

        If the url is NOT a linkedin url (thus a google search page), it will find linkedin URL's and crawl them.
        It will also find "next search page" arrows on google, and crawl those as well.

        :param url: The link to crawl
        :return:
        """

        if url in self.crawled_urls:
            print("Tried to crawl the same URL twice in one session", url)
            return

        self.crawled_urls.append(url)

        if self.profiles_since_break > randint(*self.cfg.urls_between_break):
            self._take_break(
                self.cfg.sleep_random_break, "Taking break after " +
                str(self.profiles_since_break) + " profiles !")

        # If it is a linkedin profile currently being crawled, save the HTML
        if "linkedin" in url and "/in/" in url and url.count("/") == 4:

            # Check that this profile has not been parsed before
            username = url.split("/in/", 1)[1]
            if username in self.profile_manager.users:
                print("Already crawled: ", username)
                return

            print("Analyzing Profile #", len(self.profile_manager), url,
                  username)

            # Load the page
            html = self._load_page(url)
            self.profiles_since_break += 1

            # If linkedin rate-limited us, continue to the next profile
            if html is None: return

            # Save HTML to a file
            # TODO: Fix this
            try:
                self.profile_manager.write_new_html_profile(html)
            except Exception as e:
                print("ERROR: While saving HTML: ", e, " in url: ", url)

        elif "www.google.com" in url:
            # Load the page
            html = self._load_page(url)

            # If it is a google search page currently being crawled to find more linkedin profiles
            linkedin_urls = self._get_results_urls(html)
            shuffle(linkedin_urls)
            for url in linkedin_urls:
                self._crawl_page(url)

            # Get the "Next" link to go to the next page of results
            soup = BeautifulSoup(html, "lxml")
            next_link = soup.find("a", {"id": "pnnext"})
            if next_link is not None:
                next_link = urljoin("http://www.google.com", next_link["href"])
                self._crawl_page(next_link)
        else:
            print("ERROR: Tried to crawl bad url: ", url)

    def _get_results_urls(self, html):
        # Get links to results from a google search
        links = []
        soup = BeautifulSoup(html, "lxml")

        for link in soup.find_all('cite', class_="_Rm"):
            links.append(link.text)

        return links

    def _load_page(self, url):
        # Load the page
        try:
            self.browser.get(url)
            html = self.browser.page_source
        except TimeoutError:
            self._take_break(self.cfg.sleep_timeout,
                             "Page has timed out. Taking a long break!")
            return

        # Sleep for a certain amount of time
        if "www.google.com" in url:
            self._sleep(self.cfg.sleep_google_search, "Google Search")
        else:
            self._sleep(self.cfg.sleep_linkedin, "Linkedin Profile")

        # Check that linkedin didn't rate limit us
        if "Join to view full profiles for free" in html:
            self._take_break(self.cfg.sleep_rate_limiting,
                             "Linkeding is rate-limiting us. Taking a break.")
            return None

        return html

    def _sleep(self, interval, reason):
        """
        A sleep helper function that prints the reason its sleeping, and the random interval of sleep
        :param interval: A tuple (mintime, maxtime)
        :param reason: Why the sleep is happening (for a pretty print)
        """

        # TODO: Add a self.browser.back() or something here to fake human use, and/or self.browser.delete_all_cookies()
        rand_interval = uniform(*interval)
        print("Sleeping", rand_interval, "seconds: ", reason)

        # If the browser isn't open, do a normal sleep
        if self.browser is None:
            sleep(rand_interval)
            return

        # If the browser is closed, sleep halfway then do a random action, then continue sleeping
        sleep(rand_interval / 2)

        # Do a random action while sleeping
        actions = [
            self.browser.back, lambda: self.browser.set_window_size(
                randint(700, 1080), randint(700, 1080)),
            lambda: self.browser.set_window_position(randint(0, 300),
                                                     randint(0, 300)),
            self.browser.maximize_window, self.browser.delete_all_cookies
        ]

        action = choice(actions)
        print("Performing random action: ", action)
        action()
        sleep(rand_interval / 2)

    def _take_break(self, break_interval, reason):
        """ Shut down browser, restart with new browser """
        self._close_browser()
        self._sleep(break_interval, reason)
        self.profiles_since_break = 0
        self._start_browser()

    def _close_browser(self):
        self.browser.quit()
        self.browser = None

    def _start_browser(self):
        assert self.browser is None, "Browser must not exist in order to call _start_browser!"

        # Load a user profile from normal chrome
        user_profile = "C:\\Users\\Alex Thiel\\AppData\\Local\\Google\\Chrome\\User Data\\Default"

        # Options
        options = Options()
        options.add_argument("user-data-dir={}".format(user_profile))
        options.add_experimental_option("excludeSwitches", [
            "ignore-certificate-errors",
            "safebrowsing-disable-download-protection",
            "safebrowsing-disable-auto-update",
            "disable-client-side-phishing-detection"
        ])
        os.environ["webdriver.chrome.driver"] = self.driver_path

        # Add variation to the browser
        if randint(0, 2) == 1:
            options.add_argument("--incognito")
            print("Option: Incognito")
        if randint(0, 2) == 1:
            options.add_argument("--disable-extensions")
            print("Option: Disabling Extensions")
        if randint(0, 2) == 1:
            options.add_argument("--disable-plugins-discovery")
            print("Option: Disabling plugins discovery")
        if randint(0, 2) == 1:
            options.add_argument('--no-referrers')
            print("Option: No Referrers")
        if randint(0, 2) == 1:
            options.add_argument('--disable-web-security')
            print("Option: Disabled web security")
        if randint(0, 2) == 1:
            options.add_argument('--allow-running-insecure-content')
            print("Option: Allowing running insecure content")
        if randint(0, 2) == 1:
            options.add_experimental_option(
                'prefs', {
                    'credentials_enable_service': False,
                    'profile': {
                        'password_manager_enabled': False
                    }
                })
            print("Options: Disabled Password Manager")

        # options.add_experimental_option('prefs', {'profile.managed_default_content_settings.images': 2})

        agent = UserAgent().random
        options.add_argument("user-agent=" + agent)
        self.current_agent = agent
        print("Option: Agent:", agent)

        # Open up browser window
        self.browser = Driver(executable_path=self.driver_path,
                              chrome_options=options)
        self.browser.set_page_load_timeout(self.cfg.browser_timeout)
        self.browser.delete_all_cookies()

        if randint(0, 2) == 1:
            print("Option: Start Maximized")
            self.browser.maximize_window()
        else:
            self.browser.set_window_size(randint(700, 1080),
                                         randint(700, 1080))
            self.browser.set_window_position(randint(0, 300), randint(0, 300))
Пример #14
0
    def post(self):

        req_parser = reqparse.RequestParser()
        req_parser.add_argument('url', type=str, required=True)
        args = req_parser.parse_args()

        url = args['url']
        if not url:
            return {
                'url': url,
                'x5sec': '',
            }

        option = ChromeOptions()
        # option.add_argument('--headless')
        option.add_argument('--no-sandbox')
        option.add_argument('--disable-dev-shm-usage')
        mobile_emulation = {"deviceMetrics": { "width": 375, "height": 667, "pixelRatio": 3}, "userAgent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372"}
        option.add_experimental_option("mobileEmulation", mobile_emulation)
        option.add_experimental_option('w3c', False)
        option.add_argument('--disable-extensions')
        option.add_argument('--disable-gpu')
        option.add_argument("--disable-features=VizDisplayCompositor")
        option.add_experimental_option('excludeSwitches', ['enable-automation'])
        option.add_experimental_option("useAutomationExtension", False)
        option.binary_location = '/root/Downloads/login_taobao/node_modules/puppeteer/.local-chromium/linux-672088/chrome-linux/chrome'
        wd = Chrome(options=option, executable_path='/root/Downloads/slider_servers/chromedriver')
        wd.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
            "source": """
            Object.defineProperty(navigator, 'webdriver', {
              get: () => undefined
            })
          """
        })
        '''
        wd.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
            "source": """
                               Object.defineProperty(navigator, 'webdriver', {
                                 get: () => undefined
                               });
                               Object.defineProperty(navigator, 'language', {
	                             get: () => "zh-CN"
                               });
                               Object.defineProperty(navigator, 'deviceMemory', {
	                             get: () => 8
                               });
                               Object.defineProperty(navigator, 'hardwareConcurrency', {
	                             get: () => 8
                               });
                               Object.defineProperty(navigator, 'platform', {
	                             get: () => 'MacIntel'
                               });
                               Object.defineProperty(navigator, 'userAgent', {
                                 get: () => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
                               });
                               Object.defineProperty(navigator, 'plugins', {
                                 get: () => [1, 2, 3, 4, 5]
                               });
                             """
        })
        '''
        wd.set_page_load_timeout(20)
        _timeout = WebDriverWait(wd, 20)
        try:
            x5sec = ''
            wd.get(url)
            wd.implicitly_wait(10)
            wd.delete_all_cookies()
            cnt = 0
            while True:
                time.sleep(0.4)
                wd.find_element_by_id("nc_1_n1t").click()
                slid_ing = wd.find_element_by_id("nc_1_n1t")
                time.sleep(0.2)
                try:
                    TouchActions(wd).flick_element(slid_ing, 258, 0, random.randint(200, 300)).perform()
                    time.sleep(0.2)
                except Exception as e:
                    import traceback
                    print(traceback.format_exc())
                    print(e)
                    time.sleep(0.4)
                try:
                    slide_refresh = wd.find_element_by_xpath('//*[@id="nc_1-stage-3"]/span[1]/span[1]')
                    slide_refresh.click()
                except:
                    break
                cnt += 1
                if cnt > 10:
                    break
            cookies = wd.get_cookies()
            wd.close()
            for x5sec_data in cookies:
                if 'x5sec' in x5sec_data.values():
                    x5sec = x5sec_data['value']
            return {
                'x5sec': x5sec,
            }
        except:
            wd.close()
            return {
                'url': url,
                'x5sec': '',
            }
Пример #15
0
    def post(self):

        req_parser = reqparse.RequestParser()
        req_parser.add_argument('url', type=str, required=True)
        args = req_parser.parse_args()

        url = args['url']
        if not url:
            return {
                'url': url,
                'x5sec': '',
            }

        option = ChromeOptions()
        # option.add_argument('--headless')
        option.add_argument('--no-sandbox')
        # option.add_argument('--proxy-server=http://HD3P6R2K3912I09D:[email protected]:9020')
        option.add_argument(
            'user-agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36"')
        option.add_argument('--disable-dev-shm-usage')
        option.add_argument('--disable-extensions')
        option.add_argument('--disable-gpu')
        option.add_argument("--disable-features=VizDisplayCompositor")
        option.add_experimental_option('excludeSwitches', ['enable-automation'])
        option.add_experimental_option("useAutomationExtension", False)
        # option.binary_location = '/root/Downloads/login_taobao/node_modules/puppeteer/.local-chromium/linux-672088/chrome-linux/chrome'
        wd = Chrome(options=option, executable_path='chromedriver')
        # wd = Chrome(ChromeDriverManager().install(), options=option)
        wd.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
            "source": """
                               Object.defineProperty(navigator, 'webdriver', {
                                 get: () => undefined
                               });
                               Object.defineProperty(navigator, 'language', {
	                             get: () => "zh-CN"
                               });
                               Object.defineProperty(navigator, 'deviceMemory', {
	                             get: () => 8
                               });
                               Object.defineProperty(navigator, 'hardwareConcurrency', {
	                             get: () => 8
                               });
                               Object.defineProperty(navigator, 'platform', {
	                             get: () => 'MacIntel'
                               });
                               Object.defineProperty(navigator, 'userAgent', {
                                 get: () => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
                               });
                               Object.defineProperty(navigator, 'plugins', {
                                 get: () => [1, 2, 3, 4, 5]
                               });
                             """
        })
        wd.set_page_load_timeout(20)
        _timeout = WebDriverWait(wd, 20)
        try:
            x5sec = ''
            wd.get(url)
            wd.implicitly_wait(10)
            wd.delete_all_cookies()

            # todo 多页面并行实现
            cnt = 0
            while True:
                time.sleep(0.2)
                wd.find_element_by_id("nc_1_n1z").click()
                slid_ing = wd.find_element_by_id("nc_1_n1z")
                ActionChains(wd).click_and_hold(on_element=slid_ing).perform()
                time.sleep(0.2)
                lgh = 0
                try:
                    while lgh <= 510:
                        lgh += random.randint(30, 50)
                        ActionChains(wd).move_by_offset(xoffset=lgh, yoffset=0).perform()
                    time.sleep(0.2)
                    ActionChains(wd).release().perform()

                except:
                    time.sleep(0.2)
                    ActionChains(wd).release().perform()

                try:
                    slide_refresh = wd.find_element_by_xpath("//div[@id='nocaptcha']/div/span/a")
                    slide_refresh.click()
                except:
                    break
                cnt += 1
                if cnt > 10:
                    break
            cookies = wd.get_cookies()
            wd.close()
            for x5sec_data in cookies:
                if 'x5sec' in x5sec_data.values():
                    x5sec = x5sec_data['value']
            return {
                'x5sec': x5sec,
            }
        except:
            wd.close()
            return {
                'url': url,
                'x5sec': '',
            }
Пример #16
0
class InstaBot(object):
    base_url = 'https://www.instagram.com'

    def __init__(self, implicit_wait=20, page_load_timeout=30):
        try:
            Xvfb().start()
        except EnvironmentError:
            pass

        options = ChromeOptions()
        options.add_argument('--no-sandbox')
        options.add_argument('--disable-setuid-sandbox')

        self.driver = Chrome(settings.CHROMEDRIVER_PATH,
                             chrome_options=options)
        self.driver.implicitly_wait(implicit_wait)
        self.driver.set_page_load_timeout(page_load_timeout)

        self.wait = WebDriverWait(self.driver, settings.WEB_DRIVER_WAIT_SEC)

        self.liked = 0
        self.liked_total_samples = 0
        self.followed = 0

    def close(self):
        try:
            self.driver.delete_all_cookies()
            self.driver.close()

            from subprocess import call
            call(['killall', 'Xvfb'])
            call(['killall', 'chromedriver'])
        except:
            pass

    def login(self, username=None, password=None):
        username = username or os.environ.get('INSTABOT_IG_USERNAME')
        password = password or os.environ.get('INSTABOT_IG_PASSWORD')

        if not username or not password:
            raise InvalidUsernamePasswordError

        logger.info("Logging in as: %s" % username)

        self.driver.get(self.base_url)
        self.wait.until(EC.element_to_be_clickable(
            (By.XPATH, xpath.login))).click()
        self.driver.find_element_by_name('username').send_keys(username)
        self.driver.find_element_by_name('password').send_keys(password)
        self.driver.find_element_by_xpath(xpath.submit_login).click()

    def follow_users(self, usernames=None):
        """
        Follow all the users (don't pass `@')
        """
        for username in usernames:
            time.sleep(settings.FOLLOW_USER_SLEEP_SEC)
            self.driver.get('%s/%s' % (self.base_url, username))
            try:
                elem = self.wait.until(
                    EC.element_to_be_clickable((By.XPATH, xpath.follow)))
                if elem.text.lower() != 'following':
                    elem.click()
                    self.followed += 1
                    logger.info("Started following %s" % username)
                else:
                    logger.info("Already following %s" % username)

            except NoSuchElementException as e:
                logger.info(e)

            except Exception as e:
                logger.error(e)

    def like_tags(self, tags, num=100):
        """
        Like `num' number of posts when exploring hashtag (don't pass `#')

        A random sample of posts will be liked for a given tag
        Return the usernames of the posts liked
        """
        usernames = []
        for tag in tags:
            time.sleep(settings.LIKE_TAG_SLEEP_SEC)
            logger.info("Liking posts with tag: %s" % tag)
            self.driver.get('%s/explore/tags/%s/' % (self.base_url, tag))
            time.sleep(settings.LIKE_TAG_SLEEP_SEC)
            self._load_more(max(1, num / 10))

            # get the actual url's of images to like
            try:
                main = self.driver.find_element_by_tag_name('main')
            except NoSuchElementException as e:
                logger.info(e)
                continue

            links = main.find_elements_by_tag_name('a')
            urls = [link.get_attribute('href') for link in links]

            sample = random.sample(urls, min(num, len(links)))
            self.liked_total_samples += len(sample)
            logger.info("Like sample size: %d" % len(sample))
            for url in sample:
                time.sleep(settings.LIKE_TAG_SLEEP_SEC)
                try:
                    self.driver.get(url)
                    elem = self.driver.find_element_by_link_text('Like')
                    username = self.driver.find_element_by_xpath(
                        xpath.profile_username).text

                    elem.click()
                    self.liked += 1
                    usernames.append(username)

                except NoSuchElementException as e:
                    logger.info(e)

            logger.info("Liked %d/%d" % (self.liked, self.liked_total_samples))

        return usernames

    def _load_more(self, n=10):
        """
        Press "end" key `n' times to load more images
        """
        body = self.driver.find_element_by_tag_name('body')
        for _ in range(n):
            body.send_keys(Keys.END)
            time.sleep(settings.LOAD_MORE_SLEEP_SEC)
Пример #17
0
driver = Chrome(ChromeDriverManager().install(), options=opts)

for i in range(ran):
    driver.get(ank)
    try:
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, xent)))
        ent = driver.find_element_by_xpath(xent)
        ent.send_keys(code)
        ent.submit()
    except TimeoutError:
        driver.close()
        break
    
    try:
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, xent1)))
        ent1 = driver.find_element_by_xpath(xent1)
        ent1.send_keys(mes)
        ent1.submit()
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, xtest)))
    except TimeoutError:
        driver.close()
        break
    driver.delete_all_cookies()

try:
    driver.close()
except:
    pass

Пример #18
0
class Target:
    def __init__(self, mode: str, pin: str):
        self.options = Options()
        if mode == "headless":
            self.options.add_argument("--headless")
        self.layers = dict()  # słownik par {rodzaj_ankiety:odpowiedź}
        self.driver = Chrome(ChromeDriverManager().install(),
                             options=self.options)
        self.pin = pin
        self.ank = r"https://www.mentimeter.com/"

    def add(self, layer: Dict[str, str]) -> None:
        self.layers.update(layer)  # dodawanie kolejnych warstw

    def cloud(self, ans: str) -> None:
        xinp = r"/html/body/div[1]/div/div[2]/div[1]/form/fieldset/div/div/input"
        try:
            WebDriverWait(self.driver, 10).until(
                EC.presence_of_element_located((By.XPATH, xinp)))
            ent1 = self.driver.find_elements_by_xpath(xinp)
            for ent in ent1:
                ent.send_keys(ans)
                ent.submit()
            WebDriverWait(self.driver, 10).until(
                EC.presence_of_element_located(
                    (By.XPATH,
                     r"/html/body/div[1]/div/div[2]/div[1]/div[2]/h1")))
        except:
            self.driver.close()

    def opend(self, ans: str) -> None:
        xinp = r"/html/body/div[1]/div/div[2]/div[1]/form/fieldset/div/textarea"
        try:
            WebDriverWait(self.driver, 10).until(
                EC.presence_of_element_located((By.XPATH, xinp)))
            ent = self.driver.find_element_by_xpath(xinp)
            ent.send_keys(ans)
            ent.submit()
            WebDriverWait(self.driver, 10).until(
                EC.presence_of_element_located(
                    (By.XPATH,
                     r"/html/body/div[1]/div/div[2]/div[1]/div[2]/h1")))
        except:
            self.driver.close()

    def run(self, iterations: int) -> None:
        try:
            for i in range(iterations):
                self.driver.get(self.ank)
                WebDriverWait(self.driver, 10).until(
                    EC.presence_of_element_located(
                        (By.XPATH,
                         r"/html/body/div[1]/div[1]/header/div/div/form/input"
                         )))
                ent = self.driver.find_element_by_xpath(
                    r"/html/body/div[1]/div[1]/header/div/div/form/input")
                ent.send_keys(self.pin)
                ent.submit()
                k0 = list(self.layers.keys())[0]
                eval(f"self.{k0}('{self.layers[k0]}')")

                self.driver.delete_all_cookies()
        except:
            pass
        self.layers.pop(k0)