Exemplo n.º 1
0
    def read_single_goods_commodity(self, element: WebElement) \
            -> (Commodity, None):
        """
        :param
            element:A item of the list witch gain by method: get_goods_list().
        :return:
            An instance of Commodity which haven't set the value of keyword
            or None, if an error occurred.
        """
        try:
            # 获取商品url,可能失败
            comm = Commodity()
            comm.item_url = self.get_item_url(element)
            # XXX: Keyword未指定

            # 获取商品title
            comm.item_title = self.get_item_name(element)
            # 获取商品name
            comm.item_name = comm.item_title
            # 获取商品分类(列表下是不存在的)
        except NoSuchElementException:
            logging.warning('[Get single goods, No such element]:' +
                            (element.get_attribute("outerHTML") or "None ") + (traceback.print_exc() or 'None'))
            return None
        try:
            # 获取店铺名
            comm.store_name = self.get_store_name(element)
            # 获取店铺url
            comm.store_url = self.get_store_url(element)
            # 默认访问次数为0
        except NoSuchElementException:
            logging.warning("Get single goods, Can't get store info:" +
                            (element.get_attribute("outerHTML") or "None ") + (traceback.print_exc() or 'None'))
        return comm
Exemplo n.º 2
0
def download_lesson(driver: WebDriver, lesson: WebElement,
                    module_path: pathlib.Path) -> None:
    lesson_url = lesson.get_attribute('href')

    course_page = driver.current_window_handle

    driver.execute_script('window.open()')
    driver.switch_to.window(driver.window_handles[1])
    driver.get(lesson_url)

    youtube_url = get_youtube_url(driver)
    if youtube_url is not None:

        # BUG: the output option is not working; fix it;
        output_path = module_path / '%(title)s.%(ext)s'
        ydl_opts = {
            'output': str(output_path),
            'retries': 10,
        }

        try:
            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                ydl.download([youtube_url])
        except youtube_dl.utils.DownloadError as e:
            print(e)

    driver.close()
    driver.switch_to.window(course_page)
Exemplo n.º 3
0
    def __init__(self, element: WebElement) -> None:
        if "accordion__section" not in element.get_attribute("class"):
            raise Exception("Element is not an accordion section")

        self._element = element

        self._header_element: WebElement = self._element.find_element_by_xpath(
            "a[contains(concat(' ', normalize-space(@class), ' '), ' accordion__section__title ')]"
        )

        # Expand the accordian if not already expanded
        if "active" not in self._element.get_attribute("class"):
            self._header_element.click()
            # Accordion needs a bit of time to expand
            sleep(0.5)

        title_element: WebElement = self._header_element.find_element_by_tag_name(
            "span")

        self._content_element: WebElement = self._element.find_element_by_xpath(
            "div[contains("
            "concat(' ', normalize-space(@class), ' '),' accordion__section__content '"
            ")]")

        self.title: str = title_element.text
Exemplo n.º 4
0
def enterElem(e:WebElement, func):
    prev_url = driver.current_url
    uprint('prev_url='+prev_url)
    next_url = e.get_attribute('href')
    uprint('next_url='+next_url)
    driver.get(next_url)
    func(prev_url)
Exemplo n.º 5
0
 def get_text(self,
              locator="",
              locator_type="id",
              element: WebElement = None,
              info=""):
     """
     NEW METHOD
     Get 'Text' on an element
     Either provide element or a combination of locator and locatorType
     """
     try:
         if locator:  # This means if locator is not empty
             self.log.debug("In locator condition")
             element = self.get_element_(locator, locator_type)
         self.log.debug("Before finding text")
         text = element.text
         self.log.debug("After finding element, size is: " + str(len(text)))
         if len(text) == 0:
             text = element.get_attribute("innerText")
         if len(text) != 0:
             self.log.info("Getting text on element :: " + info)
             self.log.info("The text is :: '" + text + "'")
             text = text.strip()
     except:
         self.log.error("Failed to get text on element " + info)
         print_stack()
         text = None
     return text
Exemplo n.º 6
0
    def analyze_element(self, el: WebElement):
        if "featured-plan" in el.get_attribute("class").split(" "):
            term, price, product_name = self.__featured_analyze_element(el)
        else:
            term_element = el.find_element_by_xpath(".//ul/li[1]")
            match = re.search(r"\b\d+\b", term_element.text)
            if match:
                term = match.group()
            else:
                term = 1

            price_element = el.find_element_by_css_selector("div.product2 h2")
            price = re.search(r"(\d+(\.\d+)?)", price_element.text).group()

            plan_element = el.find_element_by_css_selector("div.product2 h4")
            product_name = plan_element.text

            efl_download_link_element = el.find_element_by_xpath(".//a[1]")
            efl_url = efl_download_link_element.get_attribute("href")
            self.__download_pdf(efl_url)

        return {
            "term": term,
            "price": price,
            "product_name": product_name,
        }
 def getElemAttrib(e:WebElement,attName:str, trialCount=20,pollInterval=3,default=None) -> str:
     for trial in range(trialCount):
         try:
             return e.get_attribute(attName)
         except (StaleElementReferenceException, NoSuchElementException, TimeoutException):
             sleep(pollInterval)
     return default
Exemplo n.º 8
0
 def _get_status_from_class_string(status_element: WebElement) -> JobStatus:
     """Get JobStatus from @class string"""
     class_string = status_element.get_attribute('class')
     for status in JobStatus:
         if status.value in class_string:
             return status
     raise KeyError('Job status not found in class string: %s' %
                    str(class_string))
Exemplo n.º 9
0
def findVidSrc(vidEl: WebElement):
    postersrc = vidEl.get_attribute('data-postersrc')
    if not postersrc:
        return ['', '']
    vid_ids = re.findall(r'^.*\/(.*?)\..*?$', postersrc)
    return [
        f'https://i.kinja-img.com/gawker-media/image/upload/{vid_ids[0]}.mp4',
        f'{vid_ids[0]}.mp4'
    ]
Exemplo n.º 10
0
 def submit_form(self, form: WebElement) -> None:
     form_selector = ("." +
                      ".".join(form.get_attribute("class").split(" "))
                      if form else 'form')
     submit_button_selector = (
         f'{form_selector} input[type="submit"], '
         '.form-footer__content__main-controls button[id="submit-form"]')
     submit_button = self.wait_for_element(submit_button_selector)
     submit_button.click()
Exemplo n.º 11
0
def get_value_of_elem(elem: WebElement):
    class_name = elem.get_attribute('class')
    if 'blank' in class_name:
        return 'o'
    elif 'bombflagged' in class_name:
        return 'x'
    elif 'bombdeath' in class_name:
        raise Exception('It\'s a boy!')
    else:
        return int(class_name[-1])
Exemplo n.º 12
0
    def parse_link(story_element: WebElement) -> Tuple:
        with open('./parser/story.html', 'w') as file:
            file.write(story_element.get_attribute('outerHTML'))
        # if not story_element.find_elements_by_class_name("story__title-link"):
        #     return

        link = story_element.find_element_by_class_name("story__title-link")
        href = link.get_attribute('href')

        return href, link.text
Exemplo n.º 13
0
def handle_script(script: WebElement) -> str:
    """Returns download url of highest quality version"""
    script_text = script.get_attribute("innerHTML")
    mobj = config_regex.search(script_text)
    unparsed_json = mobj.groups()[0]
    parsed_json = json.loads(unparsed_json)
    files = parsed_json["request"]["files"]["progressive"]
    top_quality = sorted(files, key=lambda f: f["height"],
                         reverse=True)[1]["url"]
    return top_quality
Exemplo n.º 14
0
    def __init__(self, element: WebElement) -> None:
        if "tiles__tile" not in element.get_attribute("class"):
            raise Exception("Element is not a tile")

        self._link_element: WebElement = element.find_element_by_xpath("a")
        self._title_element: WebElement = self._link_element.find_element_by_tag_name("h2")
        self._summary_element: WebElement = self._link_element.find_element_by_tag_name("p")

        self.title: str = self._title_element.text
        self.summary: str = self._summary_element.text
        self.url: str = self._link_element.get_attribute("href")
Exemplo n.º 15
0
    def __web_element_to_dict(self, element: WebElement) -> dict:
        float_locator = By.XPATH, './/span[contains(text(), "Float")]//parent::div'
        paint_locator = By.XPATH, './/span[contains(text(), "Paint")]//parent::div'
        price_locator = By.XPATH, './/span[@class="pricetext"]'
        wear_locator = By.XPATH, './/div[@class="quality"]'
        stattrack_locator = By.XPATH, './/div[@class="stattrak"]'
        locked_locator = By.XPATH, './/div[@class="bot-icon"]'

        return {
            'name': str(element.get_attribute('data-original-title')).strip(),
            'game': str(element.get_attribute('data-appid')).strip(),
            'floatvalue':
            self.__value_of(self.__get_text(element, float_locator)),
            'paint': self.__value_of(self.__get_text(element, paint_locator)),
            'price': self.__value_of(self.__get_text(element, price_locator)),
            'wear': self.__get_text(element, wear_locator),
            'stattrack':
            True if self.__get_text(element, stattrack_locator) else False,
            'locked': self.__get_text(element, locked_locator)
        }
Exemplo n.º 16
0
 def get_attribute_from_element(self,
                                attribute_name: str,
                                locator_type: By = None,
                                locator_value: str = None,
                                element: WebElement = None,
                                from_element: WebElement = None,
                                time_to_wait: int = TIME_TO_WAIT) -> str:
     if not element:
         element = self.find_element_by(locator_type, locator_value,
                                        from_element, time_to_wait)
     return element.get_attribute(attribute_name)
Exemplo n.º 17
0
def findImgSrc(imgEl: WebElement):
    srcset = imgEl.get_attribute('srcset')
    if not srcset:
        srcset = imgEl.get_attribute('data-srcset')
    if not srcset:
        src = imgEl.get_attribute('src')
        if not src:
            return ['', '']
        src_ids = re.findall(r'^.*\/(.*?)$', src)
        if len(src_ids) == 0 or src_ids[0].startswith('http'):
            return ['', '']
        return [
            f'https://i.kinja-img.com/gawker-media/image/upload/{src_ids[0]}',
            src_ids[0]
        ]
    img_ids = re.findall(r'^.*\/(.*?)\s80w', srcset)
    return [
        f'https://i.kinja-img.com/gawker-media/image/upload/{img_ids[0]}',
        img_ids[0]
    ]
Exemplo n.º 18
0
def getElemAttr(elem:WebElement, attr:str, timeOut:float=30, pollFreq:float=1) -> str:
    timeElapsed=0.0
    while timeElapsed < timeOut:
        beginTime=time.time()
        try:
            return elem.get_attribute(attr)
        except StaleElementReferenceException:
            pass
        time.sleep(pollFreq)
        timeElapsed+=(time.time()- beginTime)
    raise TimeoutException('[getElemAttr] Time out elem='+str(elem))
Exemplo n.º 19
0
 def getElemAttrib(e: WebElement,
                   attName: str,
                   trialCount=20,
                   pollInterval=3,
                   default=None) -> str:
     for trial in range(trialCount):
         try:
             return e.get_attribute(attName)
         except (StaleElementReferenceException, NoSuchElementException,
                 TimeoutException):
             sleep(pollInterval)
     return default
Exemplo n.º 20
0
def clickDownloadableElem(elem:WebElement)->str:
    pollFreq=1
    filesOld=os.listdir(dlDir)
    fileName=getElemText(elem)
    href = elem.get_attribute('href')
    assert href is not None
    uprint('fileName="%s" href="%s"'%(fileName,href))
    sql("UPDATE dlink SET href=:href WHERE"
        " file_name=:fileName",locals())
    uprint("UPDATE dlink SET href=%(href)s WHERE"
        " file_name=%(fileName)s"%locals())
    return
Exemplo n.º 21
0
    def _get_team_id_from_link(link: WebElement) -> str:
        href_attribute = link.get_attribute("href")
        team_id = href_attribute.split("teamId=")[-1]
        try:
            _ = int(team_id)
        except ValueError as e:
            logger.error(
                f"Team ID {team_id} does not seem correct (not an integer)")
            raise RuntimeError(
                f"Could not get team ID from `href` attribute: {href_attribute}"
            ) from e

        return team_id
Exemplo n.º 22
0
    def _get_team_id_from_class(element: WebElement) -> str:
        class_attribute = element.get_attribute("class")
        team_id = class_attribute.split("teamId-")[-1]
        try:
            _ = int(team_id)
        except ValueError as e:
            logger.error(
                f"Team ID {team_id} does not seem correct (not an integer)")
            raise RuntimeError(
                f"Could not get team ID from `class` attribute: {class_attribute}"
            ) from e

        return team_id
Exemplo n.º 23
0
    def from_web_element(self, web_element: WebElement):
        session_text = web_element.text
        session_tokens = session_text.split(' ')

        self.status = session_tokens[0]
        self.session_id = session_tokens[1]
        self.session_name = web_element.find_element_by_class_name('title').text
        self.url = web_element.get_attribute('data-vote-url').replace('/vote', '')

        speaker_text = " ".join(session_tokens[2:-3])
        speaker_text = speaker_text.replace(self.session_name, '')
        speaker_text = speaker_text.replace('about', '').strip()
        self.speaker_text = speaker_text
def element_to_image_info(element: Element,
                          with_thumbs: bool) -> Optional[ImageInfo]:
    url = json.loads(element.get_attribute('data-bem')
                     or "{}").get('serp-item', {}).get('img_href', None)
    if not url:
        return None

    thumb = find(element, 'serp-item__thumb') if with_thumbs else None
    thumb_png = thumb_png.screenshot_as_png if thumb else None
    if with_thumbs and not thumb_png:
        return None

    return ImageInfo(url=url, thumb_png=thumb_png)
Exemplo n.º 25
0
def getElemAttr(elem: WebElement,
                attr: str,
                timeOut: float = 30,
                pollFreq: float = 1) -> str:
    timeElapsed = 0.0
    while timeElapsed < timeOut:
        beginTime = time.time()
        try:
            return elem.get_attribute(attr)
        except StaleElementReferenceException:
            pass
        time.sleep(pollFreq)
        timeElapsed += (time.time() - beginTime)
    raise TimeoutException('[getElemAttr] Time out elem=' + str(elem))
Exemplo n.º 26
0
    def _get_player_id_from_class(element: WebElement) -> str:
        class_attribute = element.get_attribute("class")
        player_id = class_attribute.split("playerNameId-")[-1].split(" ")[0]
        try:
            _ = int(player_id)
        except ValueError as e:
            logger.error(
                f"Player ID {player_id} does not seem correct (not an integer)"
            )
            raise RuntimeError(
                f"Could not get player ID from `class` attribute: {class_attribute}"
            ) from e

        return player_id
Exemplo n.º 27
0
    def robust_input(self,
                     ele: WebElement,
                     text: str,
                     use_paste=False,
                     ensure_value=False) -> bool:
        ele.clear()
        if use_paste:
            import os
            os.system("echo {}|clip".format(text))
            ele.send_keys(Keys.CONTROL, "v")
        else:
            ele.send_keys(text)

        if ensure_value:
            if ele.get_attribute("value") == text:
                return True
            else:
                logger_driver.warning({
                    "target input": text,
                    "inputted": ele.get_attribute("value")
                })
                return False
        else:
            return True
Exemplo n.º 28
0
 def get_attrs(self,
               element: WebElement,
               method: str = 'bs4',
               verbose: int = 0):
     """
     get list of attributes from an element.
     https://stackoverflow.com/questions/27307131/selenium-webdriver-how-do-i-find-all-of-an-elements-attributes
     somehow, webdriver doesn't have an API for this
     :param element:
     :param method:
     :return:
     """
     """
     note: for html '<div class="login-greeting">Hi LCA Editor Tester,</div>'
     bs4 will give: {'class': ['login-greeting']}
     js  will give: {'class':  'login-greeting' }  
     """
     if method == 'bs4':
         html: str = element.get_attribute('outerHTML')
         if verbose:
             tplog(f"outerHTML={html}")
         if html:
             attrs = {}
             soup = BeautifulSoup(html, 'html.parser')
             # https://www.crummy.com/software/BeautifulSoup/bs4/doc/#attributes
             for element in soup():
                 # soup() is a generator
                 # element.attrs is a dict
                 attrs.update(element.attrs)
             return attrs
         else:
             return {}
     elif method == 'js':
         # java script. duplicate attributes will be overwritten
         js_script = 'var items = {}; ' \
                     'for (index = 0; index < arguments[0].attributes.length; ++index) { ' \
                     '   items[arguments[0].attributes[index].name] = arguments[0].attributes[index].value' \
                     '};' \
                     'return items;'
         attrs = self.driver.execute_script(js_script, element)
         return attrs
     else:
         raise RuntimeError(
             f"unsupported method={method}. accepted: bs4 or js")
Exemplo n.º 29
0
    def analyze_element(self, el: WebElement):
        data_product_promo = el.get_attribute("data-product-promo")
        if data_product_promo not in self.plans_promo_blacklist:
            term_element = el.find_element_by_css_selector(
                "div.card-body div ul")
            term = term_element.text
            match = re.search(r"(\d+)\s+Months", term)
            if match:
                term = match.groups()[0]
            else:
                raise Exception("Term could not match. (%s)" % term)
        else:
            term = "1"

        price_element = el.find_element_by_css_selector(
            "div.card-body div.modal-body h1" +
            ",div.card-body div.product2.cards_div2 h2")
        price = re.search(r"(\d+(\.\d+)?)",
                          price_element.text.split("¢")[0]).groups()[0]

        product_name = el.find_element_by_css_selector(
            "div.card-body div.modal-body h2" +
            ",div.card-body div.product2.cards_div2 h4").text

        efl_download_link_element = el.find_element_by_css_selector(
            "div.modal-footer a")
        efl_download_link_element.click()

        self.client.switch_to_window(self.client.window_handles[1])

        pdf_url = self.client.find_element_by_tag_name("iframe").get_attribute(
            "src")
        self.client.get(pdf_url)

        self.client.close()
        self.client.switch_to_window(self.client.window_handles[0])
        self.wait_for()

        return {
            "term": term,
            "price": price,
            "product_name": product_name,
        }
Exemplo n.º 30
0
    def send_keys(self,  # pylint: disable=no-self-use
                  element: WebElement,
                  value: str,
                  retries: int = 10) -> None:
        '''Helper to _really_ send keys to an element.

        For some yet unexplained reason when running in non-headless mode, the
        interactions with text elements do not always register by the browser.
        This causes input elements to remain empty even after sending the keys.

        This method sends the keys and then ensures that the value of the
        element is the expected string, retrying if necessary.
        '''

        for _ in range(retries):
            element.clear()
            element.send_keys(value)
            if element.get_attribute('value') == value:
                return
        logging.error('Failed to send keys to the element')
Exemplo n.º 31
0
    def get_info_from_page(self, elm: WebElement, input_info, flag):

        """
        从网页上提取需要的信息
        :param elm: 需要提取信息的页面元素对象
        :param input_: 根据元素属性取值时为元素的属性类型如:value 属性
        :param output: 需要提取的内容并作为输出内容存储的临时变量名称
        :param temp_info: 提取的内容存储的临时变量名称
        :param judge_flag: 提取类型判断标识
        :return: 提取内容的字典
        """
        # 取元素文本值
        if "TEXT" == flag:
            the_info = elm.text
        # 取元素属性值
        elif "ATTRIBUTE" == flag:
            the_info = elm.get_attribute(input_info)
        else:
            raise UnsupportedValueException('flag', flag, ['TEXT', 'ATTRIBUTE'])
        return the_info
Exemplo n.º 32
0
    def get_value_with_type(
            self, input_el: WebElement,
            **data: InputValue) -> t.Tuple[InputValue, InputType]:
        input_name = self.get_input_name(input_el)
        try:
            datum = data[input_name]
        except KeyError:
            raise MissingDataException(f"Form is missing '{input_name}' "
                                       f"input in {data}")

        if type(datum) == tuple:
            el_type, value = datum  # type: ignore
        # TODO: this regex should be more flexible / defined in settings
        elif self.is_valid_date(value):
            el_type, value = InputTypes.Date, value
        elif input_el.get_attribute('type') in ['radio', 'checkbox']:
            el_type, value = InputTypes.Checkbox, [value]
        else:
            value = datum
            el_type = InputTypes.Default
        return value, el_type
Exemplo n.º 33
0
    def set_val(self,
                element: WebElement,
                val: Any,
                send_enter: bool = False,
                send_keys: bool = False,
                **kw) -> None:
        """Set input box value by send_keys or exec script
        - Need id to set by script

        Parameters
        ----------
        element : WebElement
        val : Any
        send_enter : bool, optional
            press enter after sending, default False
        send_keys : bool, optional
            default False
        """
        driver = self.driver
        val = str(val).replace("'", "\\'").replace('"',
                                                   '\\"').replace('\n', '\\n')

        try:
            if send_keys:
                element.send_keys(val)
            else:
                driver.execute_script(
                    "document.getElementById('{}').value='{}'".format(
                        element.get_attribute('id'), val))

            if send_enter:
                element.send_keys(
                    Keys.ENTER
                )  # date fields need an ENTER to set val properly

        except:
            log.warning(f'couldn\'t set value: {val}')
            if self.suppress_errors:
                return
            element.send_keys(val)
Exemplo n.º 34
0
    def _parse_root(self, category: WebElement):
        # Root 이름
        root_name: str = category.text
        # root_name = text.replace('/', '-')

        logging.info('rootName : ' + root_name)

        for exclude_category in self.crawl_config.exclude_category:
            if eq(root_name, exclude_category):
                return None

        class_att = category.get_attribute('class')
        click_xpath = '//*[@id="home_{0}"]'.format(class_att)

        self.driver.implicitly_wait(5)
        # 먼저 클릭해봄.
        self.driver.find_element_by_xpath(click_xpath).send_keys(Keys.ENTER)
        # class_att 맞춰 내부 xPath 설정
        time.sleep(1)

        xpath_cate = '//*[@id="home_{0}_inner"]/div[1]'.format(class_att)

        # Root Category
        element: WebElement = None
        while 1:
            if element is not None:
                break
            else:
                # 클릭 이벤트가 정상적으로 안들어오면 계속 클릭하자..
                self.driver.find_element_by_xpath(click_xpath).send_keys(
                    Keys.ENTER)
                self.driver.implicitly_wait(4)
                time.sleep(1)
                element = self.driver.find_element_by_xpath(xpath_cate)

        self._insert(None, root_name, None, True)
        # Root -> sub
        co_col_elements = element.find_elements(By.CLASS_NAME, 'co_col')

        self._parse_co_col(co_col_elements, root_name)
Exemplo n.º 35
0
 def is_expanded(self, node: WebElement):
     return 'dynatree-expanded' in node.get_attribute('class')
Exemplo n.º 36
0
def elem_has_class(elem: WebElement, className: str):
	classes = elem.get_attribute('class')
	classes = classes.split()
	return className in classes
Exemplo n.º 37
0
 def get_value(element: WebElement) -> str:
     return element.get_attribute('value')