def validate_and_extract(self, node: bs4.element): if node.name == 'a' \ and node.has_attr('data-pin-do') \ and node['data-pin-do'] == 'embedPin'\ and node.has_attr('href'): return EPinterestPin(node['href']) return None
def validate_and_extract(self, node: bs4.element): if isinstance(node, bs4.element.Tag) \ and node.name == 'iframe' and node.has_attr('src')\ and node['src'].startswith('https://www.youtube.com/embed/'): return EYouTubeVideo(self.__get_youtube_video_id(node['src'])) return None
def validate_and_extract(self, node: bs4.element): if node.name == 'iframe' and node.has_attr('src')\ and utils.has_domain(node['src'], r'^https://www\.youtube\.com/embed'): return EYouTubeVideo(self.__get_youtube_video_id(node['src'])) return None
def validate_and_extract(self, node: bs4.element): """Validates if a tag is instagram post tag and returns the extracted data from the tag in EInstagramPost object""" if isinstance(node, bs4.element.Tag): if node.has_attr('class') \ and ('instagram-media' in node['class'] or 'instagram-media-rendered' in node['class']): return EInstagramPost( self.__get_instagram_shortcode(node.find('a')['href'])) if node.name == 'iframe' \ and node.has_attr('src') \ and node['src'].startswith('https://instagram.com/'): return EInstagramPost( self.__get_instagram_shortcode(node['src'])) return None
def validate_and_extract(self, node: bs4.element): if node.name == 'img' and node.has_attr('src'): return self.__create_image(node) if node.name == 'figure': img_tag = node.find('img') if img_tag and img_tag.has_attr('src'): return self.__create_image(img_tag, node.find('figcaption')) return None
def validate_and_extract(self, node: bs4.element): if node.name == 'q' \ and not utils.empty_text(node.text): cite = None if node.has_attr('cite'): cite = node['cite'] quote = Quote(node.text, cite) return quote return None
def validate_and_extract(self, node: bs4.element): video_urls = list() if node.name == 'video': if node.has_attr('src'): video_urls.append(node['src']) elif node.contents: for child in node.contents: if child.name == 'source'\ and child.has_attr('src'): video_urls.append(child['src']) if video_urls: height, width = e_utils.get_media_size(node) return Video(video_urls, height, width) if node.name == 'embed' \ and node.has_attr('src') \ and VIDEO_EXTENSIONS_PATTERN.match(node['src']): video_urls.append(node['src']) height, width = e_utils.get_media_size(node) return Video(video_urls, height, width) return None
def validate_and_extract(self, node: bs4.element): if isinstance(node, bs4.element.Tag) \ and node.has_attr('class') \ and ('twitter-tweet' in node['class'] or 'twitter-tweet-rendered' in node['class']): tweet_a_tag = node.find_all('a') if tweet_a_tag and tweet_a_tag[-1].has_attr('href'): tweet_url = tweet_a_tag[-1]['href'] tweet_id = tweet_url.split('/')[-1].split('?')[0] return ETweet(tweet_id) return None