def _get_order_info( self, order_info_element: WebElement ) -> Tuple[str, float, datetime.date]: """ :param order_info_element: :returns: the OrderID, price and date """ order_info_list: List[str] = [ info_field.text for info_field in order_info_element.find_elements_by_class_name('value') ] # value tags have only generic class names so a constant order in form of: # [date, price, recipient_address, order_id] or if no recipient_address is available # [date, recipient_address, order_id] # is assumed if len(order_info_list) < 4: order_id = order_info_list[2] else: order_id = order_info_list[3] # price is usually formatted as 'EUR x,xx' but special cases as 'Audible Guthaben' are possible as well order_price_str = order_info_list[1] if order_price_str.find('EUR') != -1: order_price = self._price_str_to_float(order_price_str) else: order_price = 0 date_str = order_info_list[0] date = ut.str_to_date(date_str) return order_id, order_price, date
def iterate_react_select_options(self, select: WebElement) -> t.List[WebElement]: """Given a .Select rendered by react-select, return an iterable of .Select-option that can be .click()-ed to select them. """ # related: https://github.com/JedWatson/react-select/issues/603#issuecomment-157888562 # noqa select.find_element_by_class_name('Select-arrow').click() self.wait(EC.element_to_be_clickable((By.CLASS_NAME, 'Select-option'))) return select.find_elements_by_class_name('Select-option')
def is_trash_story(cls, story_container: WebElement) -> bool: """False for adv and other not normal containers""" try: if story_container.find_elements_by_class_name( "story__placeholder"): return True except StaleElementReferenceException: return True return False
def get_link_from_item(friend_item: WebElement) -> str: user_link = friend_item.find_elements_by_class_name("fsl")[0] \ .find_element_by_tag_name("a") \ .get_attribute("href") if user_link.find("profile.php") == -1: return user_link.split('?')[0] else: user_id = re.findall(r'(?<=id=)\d+', user_link)[0] return f"https://www.facebook.com/profile.php?id={user_id}"
def _get_comment_issue_diff(self, mr_comment_shadow: WebElement) -> Optional[str]: issue_diff_elem_list = mr_comment_shadow.find_elements_by_class_name('issue-diff') if len(issue_diff_elem_list) == 0: #there's no issue diff issue_diff = None elif len(issue_diff_elem_list) == 1: #there's an issue diff issue_diff_elem = issue_diff_elem_list[0] issue_diff = issue_diff_elem.text else: raise ScrapeException('More than one issue-diff found in a comment.') return issue_diff
def parse_results_for_bulk_expired_domains_search(self, element: WebElement) ->[]: #return a list of DomainType if element is not None: products = element.find_elements_by_class_name(BulkCheckElements.get_element_by_class(BulkCheckElements.class_product).target) filtered = [] for product in products: domain = product.find_element_by_class_name(BulkCheckElements.get_element_by_class(BulkCheckElements.class_domain_name).target).text available = GoDaddy.checkAvailability(product.find_element_by_class_name( BulkCheckElements.get_element_by_class(BulkCheckElements.class_msg).target).text) filtered.append(DomainType(domain, available)) return filtered else: return None
def __init__(self, element: WebElement) -> None: super().__init__(element) if not self.title.startswith("Ward"): raise Exception("Accordion section is not a Ward") self.__neighbourhood_elements = element.find_elements_by_class_name( "tiles__tile" ) self.neighbourhoods = [ NeighbourhoodElement(element) for element in self.__neighbourhood_elements ]
def parse_image_links(story_element: WebElement) -> List: links = [] image_blocks = story_element.find_elements_by_class_name("story-block_type_image") for image_block in image_blocks: img = image_block.find_element_by_tag_name('img') link = img.get_attribute('src') if link is None: link: str = img.get_attribute('data-src') # TODO: check in future versions [3.11.19] # aiogram can't send .webp as image # if link.endswith('.webp'): # continue links.append(link) return json.dumps(links)
def get_elements_static(target_element: WebElement, verifier: SelecElement) ->[]: try: elements = None if verifier.element_type == NavEleType.IsId: elements = target_element.find_elements_by_id(verifier.target) elif verifier.element_type == NavEleType.IsClass: elements = target_element.find_elements_by_class_name(verifier.target) elif verifier.element_type == NavEleType.IsCssSelector: elements = target_element.find_elements_by_css_selector(verifier.target) elif verifier.element_type == NavEleType.IsName: elements = target_element.find_elements_by_name(verifier.target) else: raise ValueError("Selector not Supported") return elements except Exception as inst: print(type(inst)) print(inst.args) return None
def parse_results_for_bulk_expired_domains_search( self, element: WebElement) -> []: #return a list of DomainType if element is not None: products = element.find_elements_by_class_name( BulkCheckElements.get_element_by_class( BulkCheckElements.class_product).target) filtered = [] for product in products: domain = product.find_element_by_class_name( BulkCheckElements.get_element_by_class( BulkCheckElements.class_domain_name).target).text available = GoDaddy.checkAvailability( product.find_element_by_class_name( BulkCheckElements.get_element_by_class( BulkCheckElements.class_msg).target).text) filtered.append(DomainType(domain, available)) return filtered else: return None
def assert_start_tiles(we: WebElement) -> None: """Assert that the start page tiles are as expected. Args: we: WebElement containing the desired content to assert. """ assert_element_by_classes(we, 'section', ['tile', 'start-tile'], None, True, 2) tiles = we.find_elements_by_class_name('start-tile') assert_element_by_classes(tiles[0], 'h4', 'main-tile-header', 'What is Myaku?') assert_element_by_classes(tiles[0], 'span', 'key-word', True, True, 2) assert_element_by_classes(tiles[0], 'ol', 'myaku-ol') assert_element_by_tag(tiles[0], 'li', True, True, 3) assert_element_by_classes(tiles[1], 'h4', 'main-tile-header', 'Getting Started') assert_element_by_classes(we, 'ul', 'myaku-ul') assert_element_by_tag(tiles[1], 'li', True, True, 4) assert_element_by_tag(tiles[1], 'a', True, True, 4)
def get_elements_static(target_element: WebElement, verifier: SelecElement) -> []: try: elements = None if verifier.element_type == NavEleType.IsId: elements = target_element.find_elements_by_id(verifier.target) elif verifier.element_type == NavEleType.IsClass: elements = target_element.find_elements_by_class_name( verifier.target) elif verifier.element_type == NavEleType.IsCssSelector: elements = target_element.find_elements_by_css_selector( verifier.target) elif verifier.element_type == NavEleType.IsName: elements = target_element.find_elements_by_name( verifier.target) else: raise ValueError("Selector not Supported") return elements except Exception as inst: print(type(inst)) print(inst.args) return None
def get_job_info(self, job: WebElement, category: str): """Extract Job Info from a given element and return it in a list Args: job(WebElement): A webelement reference to a tag consisting of job info. category(str): The category the job fits into. Returns: info(list): A list containing the following info: company_name, title, job_type, region and category. """ company_and_job_type = job.find_elements_by_class_name('company') company_name = company_and_job_type[0].get_attribute('textContent') job_type = company_and_job_type[1].get_attribute('textContent') title = job.find_element_by_class_name('title').get_attribute( 'textContent') try: region = job.find_element_by_css_selector( 'section.jobs article ul li span.region.company' ).get_attribute('textContent') except NoSuchElementException: region = '' return [company_name, title, job_type, region, category]
def parse_text(story_element: WebElement) -> str: text_items = [] for item in story_element.find_elements_by_class_name("story-block_type_text"): text_items.append(item.text) return "\n".join(text_items)
def get_name_from_item(friend_item: WebElement) -> str: return friend_item.find_elements_by_class_name("fsl")[0].text
def parse_tags(story_element: WebElement): tags = story_element.find_elements_by_class_name("tags__tag") return {tag.text for tag in tags}
def parse_results(self, search_results: WebElement, driver, get_b64_images=False): offers = [] for cottage in search_results.find_elements_by_class_name( 'holiday-cottage-item'): offer = OfferRow() offer.lat, offer.lon = cottage.find_element_by_class_name( 'mapLatLong').get_attribute('textContent').strip().split(',') title_info = cottage.find_element_by_css_selector('h3>a') offer.title = title_info.text offer.url = title_info.get_attribute('href') offer.ref = offer.url.split('-ref')[-1] offer.slug = offer.url.split('/')[-1].split('-ref')[0] offer.location = cottage.find_element_by_css_selector( '.loc-container>a').text offer.description = cottage.find_element_by_css_selector( '.cottage-img .para').text regular_price = cottage.find_element_by_css_selector( '.price-from-sec>.price').text.replace('£', '').replace( ' all year', '') try: offer.weekly_low = int(regular_price.split(' to ')[0]) except ValueError: # Failed because price is text offer.weekly_low = 0 try: offer.weekly_high = int(regular_price.split(' to ')[-1]) except ValueError: # Failed because price is text offer.weekly_high = 0 meta = cottage.find_element_by_class_name( 'products-meta').text.lower().strip().split('\n')[0].split('|') offer.sleeps = meta[0].strip() offer.bedrooms = meta[1].strip() offer.dog = meta[2].strip() != 'no' offer.child = meta[3].strip() != 'no' offer.wifi = meta[4].strip() != 'no' # Late offer offer_details = cottage.find_element_by_css_selector( '.lao-strip .offer-block') try: offer.late_savings_tag = offer_details.find_element_by_class_name( 'tag').text except NoSuchElementException: offer.late_savings_tag = "" offer.late_offer = offer_details.text.replace( offer.late_savings_tag, '') offer.late_price = int(offer.late_offer.split(', £')[-1]) offer.late_nights = int( offer.late_offer.split(' for ')[-1].split(' night')[0]) # Get image url img = cottage.find_element_by_css_selector(f'#img-{offer.ref} img') base_url = urlparse(driver.current_url).netloc img_url = img.get_attribute('data-src') offer.img_url = "https://" + base_url + img_url if get_b64_images: # Image js = """ let canvas = document.createElement('canvas'); let img = document.querySelector('#img-{ref} img'); img.scrollIntoView(); canvas.id = 'canvas-{ref}'; // wait for image to load before adding it to canvas img.onload = function() {{ // using canvas to generate a b64 dataurl without having to request the image a second time to download canvas.width = img.width; canvas.height = img.height; document.body.appendChild(canvas); let ctx = canvas.getContext('2d'); ctx.drawImage(img, 0, 0); let data = canvas.toDataURL('image/jpeg', 1.0); let output = document.createElement('div'); output.id = 'output-{ref}'; output.setAttribute('dataurl', data); img.parentNode.insertBefore(output, output.nextSibling); }}; // if image has already loaded before attaching the listener, manually fire it if (img.complete && img.naturalHeight !== 0) {{ let evt = document.createEvent('Event'); evt.initEvent('load', false, false); img.dispatchEvent(evt); }} """.format(ref=offer.ref) driver.execute_script(js) # Download image as base64 try: data_elem = WebDriverWait(driver, 5).until( EC.presence_of_element_located( (By.ID, f'output-{offer.ref}'))) dataurl = data_elem.get_attribute('dataurl') if dataurl == empty_b64_jpg: # image wasn't loaded properly dataurl = None except TimeoutException: dataurl = None if dataurl: response = urllib.request.urlopen(dataurl) offer.image = response.read() offers.append(offer) return offers
def _is_deleted_comment(self, div_under_comment_header: WebElement) -> bool: deleted_comment_notices = div_under_comment_header.find_elements_by_class_name('deleted-comment-notice') return len(deleted_comment_notices) > 0
def _get_comment_author_roles(self, div_under_comment_header: WebElement) -> List[str]: role_label_elems: List[WebElement] = div_under_comment_header.find_elements_by_class_name('role-label') role_labels = list(map(lambda elem: elem.text, role_label_elems)) return role_labels