def parse_hq_location(field: WebElement) -> Tuple[City, Country]: try: div = field.find_element_by_tag_name('div') location = div.text.strip() city_country = location.split('|') city_country = _strip(city_country) city = city_country[0] if ',' in city: city = city.split( ',')[0].strip() # avoiding Cambridge, MA and the like return city, city_country[1] except: pass try: div = field.find_element_by_tag_name('div') location = div.text.strip() address_country = location.split('÷') address_country = _strip(address_country) city = address_country[0].split(',')[0].strip() return city, address_country[1] except: pass print(f'--------------- HQ Location could not be parsed') return '', ''
def parse_primary_contact(field: WebElement) -> Tuple[Email, PhoneNumber]: try: div = field.find_element_by_tag_name('div') contact = div.text.strip() email = None phone_number = None contact_parts = contact.split('\n') for part in contact_parts: if '@' in part: email = part.strip() elif 'Tel' in part or '+': phone_number = part.strip() except: pass # [email protected] # """ # Ms Silvia Icardi # Information and Communication Manager # [email protected] # Tel + 33 (0)1 42 65 61 40 # """ # """ # [email protected] # +612 9299 2215 # """ # +1 (949) 202-4681 return email, phone_number
def parse_website_url(field: WebElement) -> str: try: div = field.find_element_by_tag_name('div') website = div.text.strip() return website except: return None
def parse_yearly_income(field: WebElement) -> str: # EUR XXX, $XXX try: div = field.find_element_by_tag_name('div') income = div.text.strip() return income except: return None
def parse_surplus_deficit(field: WebElement) -> str: # EUR +xxx try: div = field.find_element_by_tag_name('div') surplus_deficit = div.text.strip() return surplus_deficit except: return None
def parse_legal_status(field: WebElement) -> str: try: div = field.find_element_by_tag_name('div') status = div.text.strip() return status except: return None
def parse_mission(field: WebElement) -> str: try: div = field.find_element_by_tag_name('div') mission = div.text.strip() return mission except: return None
def parse_year_founded(field: WebElement) -> int: try: div = field.find_element_by_tag_name('div') year = int(div.text.strip()) return year except: return None
def _parse_compact_render_elem(self, video_element: WebElement): self.title = video_element.find_element_by_css_selector("span#video-title").get_attribute("innerText") self.url = video_element.find_element_by_tag_name("a").get_attribute("href") self.channel_name = video_element.find_element_by_xpath( ".//ytd-channel-name//yt-formatted-string" ).get_attribute("innerText") self.channel_url = None # Channel is not clickable from this element
def parse_total_members(field: WebElement) -> int: try: div = field.find_element_by_tag_name('div') members = int(div.text.strip()) return members except: return None
def parse_accreditation(field: WebElement) -> bool: try: div = field.find_element_by_tag_name('div') accreditation = div.text.strip() return accreditation.lower() == 'yes' except: return False
def parse_membership_based(field: WebElement) -> bool: try: div = field.find_element_by_tag_name('div') membership_based = div.text.strip() return membership_based.lower() == 'yes' except: return False
def get_emoji(element: WebElement) -> Optional[str]: """ get emoji from emoji span """ try: return element.find_element_by_tag_name( 'span').find_element_by_tag_name('span').get_attribute( 'innerHTML') except NoSuchElementException: return None
def get_bloger(self, element: WebElement): try: user_name = element.find_element_by_tag_name("em").text user_id = None # user_id = element.find("span", attrs={"class":"ell2"}).a["href"].split("/")[-1] except AttributeError as e: print(e) user_name = None user_id = None return User(user_name, user_id)
def get_xzw_height(c_main: WebElement) -> int: """ URL https://www.xzw.com/fortune/cancer/ Returns: int """ top_height = c_main.find_element_by_class_name("top").size["height"] dl_height = c_main.find_element_by_tag_name("dl").size["height"] cont_height = c_main.find_element_by_class_name( "c_cont").size["height"] return top_height + dl_height + cont_height
def extract_contact_information(col: WebElement) -> ContactInfo: full_info = col.text email = col.find_element_by_tag_name('span').text.strip() phone_number = '' try: index_after_phone_number = full_info.index('\n') phone_number = full_info[:index_after_phone_number].strip() except: pass return ContactInfo(phone_number, email, full_info)
def parse_sectors_of_activity(field: WebElement) -> List[str]: try: div = field.find_element_by_tag_name('div') sectors = div.text.strip() all_sectors = sectors.split(',') all_sectors = _remove_trailing_comma_element(all_sectors) all_sectors = _strip(all_sectors) return all_sectors except: return []
def parse_countries_where_active(field: WebElement) -> List[str]: try: div = field.find_element_by_tag_name('div') countries = div.text.strip() all_countries = countries.split(',') all_countries = _remove_trailing_comma_element(all_countries) all_countries = _strip(all_countries) return all_countries except: return []
def assert_css_loaded(we: WebElement) -> None: """Assert that the CSS stylesheet used by Myaku web has loaded. Checks that the custom font-family used by Myaku web is set for the body in order to tell if the CSS for Myaku web is loaded. Args: we: WebElement containing the desired content to assert. """ body = we.find_element_by_tag_name('body') font_family = body.value_of_css_property('font-family') assert font_family == MYAKUWEB_FONT_FAMILY
def get_shadow_element_from(self, from_object: WebElement, selector: str) -> WebElement: try: shadow_parent = from_object.find_element_by_tag_name(selector) except NoSuchElementException: print("NoSuchElementException") except: print("Unexpected error:", sys.exc_info()[0]) shadow_root = self.driver.execute_script( "return arguments[0].shadowRoot", shadow_parent) return shadow_root
def extract_representative(col: WebElement) -> Representative: full_info = col.text email = col.find_element_by_tag_name('span').text.strip() name = '' try: index_after_name = full_info.index('\n') name = full_info[:index_after_name].strip() except: pass return Representative(name, email, full_info)
def __assert_csv_table(self, table: WebElement, expects: dict) -> None: self.assertTrue(table) expect_header = expects['header'] if 'header' in expects.keys() else [] expect_body = expects['body'] if 'body' in expects.keys() else [] self.assertTrue(table.find_elements_by_css_selector('thead > tr > th')) headers = table.find_elements_by_css_selector('thead > tr > th') self.assertTrue(table.find_element_by_tag_name('tbody')) body = table.find_element_by_tag_name('tbody') if expect_header: for (i, header) in enumerate(headers): self.assertEqual(expect_header[i], header.text) else: self.__logger.info(f'{__name__}: header is no check.') if expect_body: for (i, row) in enumerate(body.find_elements_by_tag_name('tr')): expect_tr = expect_body[i] for (j, td) in enumerate(row.find_elements_by_tag_name('td')): self.assertEqual(expect_tr[j], td.text) else: self.__logger.info(f'{__name__}: body is no check.')
def parse_type_of_organization(field: WebElement) -> List[str]: try: div = field.find_element_by_tag_name('div') types = div.text.strip() all_types = types.split(',') all_types = _remove_trailing_comma_element(all_types) all_types = _strip(all_types) result = [] for type in all_types: result.append(type) return result except: return []
def get_menu_item(ele: WebElement) -> MenuItem: """ :param ele: :return: """ url = ele.find_element_by_tag_name('a') menu_title = url.text menu_url = url.get_attribute('href') menu_item = MenuItem(menu_title, menu_url) nodes = ele.find_elements_by_css_selector("ul > li") for node in nodes: sub_menu = get_menu_item(node) menu_item.nodes.append(sub_menu) return menu_item
def _get_feed_post_media(post: WebElement): """ Get link to post from post web-element from feed :param post: WebElement :return: str """ try: image = post.find_element_by_css_selector('div:nth-child(2) img') return image.get_attribute('src') except excp.NoSuchElementException: pass try: video = post.find_element_by_tag_name('video') return video.get_attribute('src') except excp.NoSuchElementException: pass return False
def parse_hq_country(field: WebElement) -> Optional[str]: div = field.find_element_by_tag_name('div') address = div.text.strip() # 40 Worth Street, Suite 303 New York, NY, 10013, USA # 158 Jan Smuts Avenue Building, Rosebank, Johannesburg | South Africa # 14/16 Boulevard Douaumont | CS 80060 75854 PARIS CEDEX 17 | France # """ # ACTED # 33 rue Godot de Mauroy # 75009 Paris # France # """ # Locked Bag Q199, Queen Victoria Building NSW 123 | Australia # Avenida Reforma 12-01 zona 10. Edificio Reforma Montúfar, Nivel 17, oficina 17-01 Ciudad de Guatemala | Guatemara # 10 Fawcett St, Suite 204 | Cambridge, MA 02138 | USA # 1904 Harbor Boulevard #831, Costa Mesa, CA 92627 ÷ USA country = None try: parts = address.split('|') country = parts[-1].strip() except: try: parts = address.split('\n') country = parts[-1].strip() except: try: parts = address.split(',') country = parts[-1].strip() except: try: parts = address.split('÷') country = parts[-1].strip() except: pass return country
def _wish_birthday(person_element: WebElement, message: str) -> bool: """Post a birthday wish to the person if we haven't already done so""" try: # Get text box print("Get text box") textarea = person_element.find_element_by_tag_name("textarea") textarea.send_keys(message) random_user_delay() # Post print("Post message") post_button = person_element.find_element_by_xpath( './/input[@value="Post"]') post_button.submit() print(message) random_user_delay() return True except (ElementNotInteractableException, ElementNotVisibleException): print("Already posted a wish") return False
def getElementByTagName(self, tagname, element: WebElement = None, maxTry=30, waitPerTry=1, raiseException: bool = True): exit = False while not exit: #not self.context.shutdownEvent.is_set(): try: if element: result = element.find_element_by_tag_name(tagname) else: result = self.driver.find_element_by_tag_name(tagname) return result except Exception as e: maxTry -= 1 if maxTry == 0: if raiseException: raise e else: break exit = self.sleep(waitPerTry)
def parse_representative( field: WebElement ) -> Email: # Titel Firstname Lastname <br> job title <br> email <br> phone number try: div = field.find_element_by_tag_name('div') rep = div.text.strip() email = None rep_parts = rep.split('\n') for part in rep_parts: if '@' in part: email = part.strip() return email except: pass # """ # Ms Silvia Icardi # Information and Communication Manager # [email protected] # Tel + 33 (0)1 42 65 61 40 # """ return email
def get_emoji(element: WebElement): try: return element.find_element_by_tag_name('span').find_element_by_tag_name('span').get_attribute('innerHTML') except NoSuchElementException: return None
def get_date(article: WebElement): return article.find_element_by_tag_name('abbr').get_attribute('title')