示例#1
0
    def validate_and_extract(self, node: bs4.element):
        if node.name == 'a' \
                and node.has_attr('data-pin-do') \
                and node['data-pin-do'] == 'embedPin'\
                and node.has_attr('href'):
            return EPinterestPin(node['href'])

        return None
    def validate_and_extract(self, node: bs4.element):
        if isinstance(node, bs4.element.Tag) \
                and node.name == 'iframe' and node.has_attr('src')\
                and node['src'].startswith('https://www.youtube.com/embed/'):
            return EYouTubeVideo(self.__get_youtube_video_id(node['src']))

        return None
    def validate_and_extract(self, node: bs4.element):
        if node.name == 'iframe' and node.has_attr('src')\
                and utils.has_domain(node['src'],
                                     r'^https://www\.youtube\.com/embed'):
            return EYouTubeVideo(self.__get_youtube_video_id(node['src']))

        return None
示例#4
0
    def validate_and_extract(self, node: bs4.element):
        """Validates if a tag is instagram post tag and
        returns the extracted data from the tag in EInstagramPost object"""

        if isinstance(node, bs4.element.Tag):
            if node.has_attr('class') \
                and ('instagram-media' in node['class']
                     or 'instagram-media-rendered' in node['class']):
                return EInstagramPost(
                    self.__get_instagram_shortcode(node.find('a')['href']))

            if node.name == 'iframe' \
                and node.has_attr('src') \
                    and node['src'].startswith('https://instagram.com/'):
                return EInstagramPost(
                    self.__get_instagram_shortcode(node['src']))

        return None
示例#5
0
    def validate_and_extract(self, node: bs4.element):
        if node.name == 'img' and node.has_attr('src'):
            return self.__create_image(node)
        if node.name == 'figure':
            img_tag = node.find('img')
            if img_tag and img_tag.has_attr('src'):
                return self.__create_image(img_tag, node.find('figcaption'))

        return None
示例#6
0
    def validate_and_extract(self, node: bs4.element):
        if node.name == 'q' \
                and not utils.empty_text(node.text):
            cite = None
            if node.has_attr('cite'):
                cite = node['cite']
            quote = Quote(node.text, cite)
            return quote

        return None
示例#7
0
    def validate_and_extract(self, node: bs4.element):
        video_urls = list()
        if node.name == 'video':
            if node.has_attr('src'):
                video_urls.append(node['src'])
            elif node.contents:
                for child in node.contents:
                    if child.name == 'source'\
                            and child.has_attr('src'):
                        video_urls.append(child['src'])
            if video_urls:
                height, width = e_utils.get_media_size(node)
                return Video(video_urls, height, width)
        if node.name == 'embed' \
                and node.has_attr('src') \
                and VIDEO_EXTENSIONS_PATTERN.match(node['src']):
            video_urls.append(node['src'])
            height, width = e_utils.get_media_size(node)
            return Video(video_urls, height, width)

        return None
示例#8
0
    def validate_and_extract(self, node: bs4.element):
        if isinstance(node, bs4.element.Tag) \
                and node.has_attr('class') \
                and ('twitter-tweet' in node['class']
                     or 'twitter-tweet-rendered' in node['class']):

            tweet_a_tag = node.find_all('a')

            if tweet_a_tag and tweet_a_tag[-1].has_attr('href'):
                tweet_url = tweet_a_tag[-1]['href']
                tweet_id = tweet_url.split('/')[-1].split('?')[0]
                return ETweet(tweet_id)

        return None