Python soup示例，tests.test_extraction.unit_test_utils.soup Python示例

示例#1

0

显示文件

"""
    This script is for unit testing of image_extractor
    Use pytest to run this script
    Command to run: /stampify$ python -m pytest
"""
import pytest

from data_models.image import Image
from extraction.content_extractors import image_extractor
from tests.test_extraction import unit_test_utils as test_utils

__EXTRACTOR = image_extractor.ImageExtractor()

__soup = test_utils.soup('image.html')

expected_output_1 = Image(
    'http://www.google.com/'
    'image_with_src_and_title.jpg', 100, 100, False, None,
    'This is Image with src and title!', '')

expected_output_2 = Image(
    'http://www.google.com/'
    'image_with_src_but_without_title.gif', 0, 0, True, None, None, '')

expected_output_3 = Image(
    'http://www.google.com/'
    'image_with_src_and_title_inside_figure.jpg', 0, 0, False, None,
    'This is Image with src and title inside figure!', '')

expected_output_4 = Image(
    'http://www.google.com/'

示例#2

0

显示文件

"""
    This script is for unit testing of embedded
    pinterest_pin extractor

    Use pytest to run this script
    Command to run: /stampify$ python -m pytest
"""
import pytest

from data_models.embedded_pinterest_pin import EPinterestPin
from extraction.content_extractors import embedded_pinterest_pin_extractor
from tests.test_extraction import unit_test_utils as test_utils

__EXTRACTOR = embedded_pinterest_pin_extractor.EPinterestPinExtractor()

__soup = test_utils.soup('pinterest_pin.html')

expected_output_1 \
    = EPinterestPin('https://www.pinterest.com/pin/99360735500167749/')

acceptable_test_data = [(__soup.find('a', class_='a_tag1'),
                         expected_output_1), ]

non_acceptable_test_data = [(__soup.find('a', class_='a_tag2'), None),
                            (__soup.find('a', class_='a_tag3'), None),
                            (__soup.find('a', class_='a_tag4'), None),
                            (__soup.find('img'), None), ]


@pytest.mark.parametrize("input_node, expected", acceptable_test_data)
def test_tag_should_return_epinterestpin_object(input_node, expected):

示例#3

0

显示文件

文件： test_embedded_instagram_post_extractor.py 项目： telescopic/stampify

"""
    This script is for unit testing of embedded_instagram_post_extractor
    Use pytest to run this script
    Command to run: /stampify$ python -m pytest
"""
import pytest

from data_models.embedded_instagram_post import EInstagramPost
from extraction.content_extractors import embedded_instagram_post_extractor
from tests.test_extraction import unit_test_utils as test_utils

__EXTRACTOR = embedded_instagram_post_extractor.EInstagramPostExtractor()

__soup = test_utils.soup('instagram.html')

expected_output_1 = EInstagramPost("post_shortcode1", '')

expected_output_2 = EInstagramPost("post_shortcode2", '')

expected_output_3 = EInstagramPost("short_code1", '')

acceptable_test_data = [(__soup.find('blockquote', class_='node1'),
                         expected_output_1),
                        (__soup.find('blockquote', class_='node2'),
                         expected_output_2),
                        (__soup.find('iframe', class_='iframe1'),
                         expected_output_3), ]

non_acceptable_test_data = [(__soup.find('div').get_text(), None),
                            (__soup.find('p'), None),
                            (__soup.find('iframe', class_='iframe2'), None),

示例#4

0

显示文件

"""
    This script is for unit testing of embedded_tweet_extractor
    Use pytest to run this script
    Command to run: /stampify$ python -m pytest
"""
import pytest

from data_models.embedded_tweet import ETweet
from extraction.content_extractors import embedded_tweet_extractor
from tests.test_extraction import unit_test_utils as test_utils

__EXTRACTOR = embedded_tweet_extractor.ETweetExtractor()

__soup = test_utils.soup('embedded_tweet.html')

expected_output_1 = ETweet('123456789123456789')

expected_output_2 = ETweet('987654321987654321')

acceptable_test_data \
    = [(__soup.find('blockquote', class_='twitter-tweet'),
        expected_output_1),
       (__soup.find('blockquote', class_='twitter-tweet-rendered'),
        expected_output_2), ]

non_acceptable_test_data = [
    (__soup.find('p', class_='twitter-tweet'), None),
    (__soup.find('p', class_='p1'), None),
]

示例#5

0

显示文件

文件： test_quote_extractor.py 项目： telescopic/stampify

"""
    This script is for unit testing of quote extractor
    Use pytest to run this script
    Command to run: /stampify$ python -m pytest
"""
import pytest

from data_models.quote import Quote
from extraction.content_extractors import quote_extractor
from tests.test_extraction import unit_test_utils as test_utils

__EXTRACTOR = quote_extractor.QuoteExtractor()

__soup = test_utils.soup('quote.html')

expected_output_1 = Quote('This is a quote tag!', 'citation1')

expected_output_2 = Quote('This is another quote tag!', None)


acceptable_test_data = [(__soup.find('q', class_='q_tag1'),
                         expected_output_1),
                        (__soup.find('q', class_='q_tag2'),
                         expected_output_2), ]

non_acceptable_test_data = [(__soup.find('q', class_='q_tag3'), None),
                            (__soup.find('img'), None), ]


@pytest.mark.parametrize("input_node, expected", acceptable_test_data)
def test_tag_should_return_quote_object(input_node, expected):

示例#6

0

显示文件

"""
    This script is for unit testing of video_extractor
    Use pytest to run this script
    Command to run: /stampify$ python -m pytest
"""
import pytest

from data_models.video import Video
from extraction.content_extractors import video_extractor
from tests.test_extraction import unit_test_utils as test_utils

__EXTRACTOR = video_extractor.VideoExtractor()

__soup = test_utils.soup('video.html')

expected_output_1 = Video(['http://www.google.com/video1.mp4'], 100, 100)

expected_output_2 = Video(
    ['http://www.google.com/movie1.mp4', 'http://www.google.com/movie1.ogg'],
    320, 240)

expected_output_3 = Video(['http://www.google.com/movie1.mp4'], 320, 240)

expected_output_4 = Video(['http://www.google.com/embed_video1.mp4'], 0, 0)

acceptable_test_data = [
    (__soup.find('video', class_='video_node1'), expected_output_1),
    (__soup.find('video', class_='video_node2'), expected_output_2),
    (__soup.find('video', class_='video_node3'), expected_output_3),
    (__soup.find('embed', class_='embed1'), expected_output_4)
]

示例#7

0

显示文件

"""
    This script is for unit testing of embedded youtube video
    Use pytest to run this script
    Command to run: /stampify$ python -m pytest
"""
import pytest

from data_models.embedded_youtube_video import EYouTubeVideo
from extraction.content_extractors import embedded_youtube_video_extractor
from tests.test_extraction import unit_test_utils as test_utils

__EXTRACTOR = embedded_youtube_video_extractor.EYouTubeVideoExtractor()

__soup = test_utils.soup('youtube_video.html')

expected_output_1 = EYouTubeVideo("tgbNymZ7vqY", 0, 0)

acceptable_test_data = [
    (__soup.find('iframe', class_='iframe1'), expected_output_1),
]

non_acceptable_test_data = [
    (__soup.find('iframe', class_='iframe2'), None),
    (__soup.find('iframe', class_='iframe3'), None),
    (__soup.find('p'), None),
]


@pytest.mark.parametrize("input_node, expected", acceptable_test_data)
def test_tag_should_return_eyoutube_video_object(input_node, expected):
    actual_yt_video_content = __EXTRACTOR.validate_and_extract(input_node)

示例#8

0

显示文件

文件： test_text_extractor.py 项目： telescopic/stampify

"""
    This script is for unit testing of text_extractor
    Use pytest to run this script
    Command to run: /stampify$ python -m pytest
"""
import pytest

from data_models.text import Text
from extraction.content_extractors.text_extractor import TextExtractor
from tests.test_extraction import unit_test_utils as test_utils

__EXTRACTOR = TextExtractor()

__soup = test_utils.soup('text.html')

expected_output_1 = Text('This is paragraph tag.', 'p', is_bold=None)

expected_output_2 = Text('Important Tag!', 'h1', is_bold=None)

expected_output_3 = Text('This is paragraph which is having strong content.',
                         'p',
                         is_bold=True)

expected_output_4 = Text('This is Navigable String.', '', is_bold=None)

acceptable_test_data = [(__soup.find('p', class_='p1'), expected_output_1),
                        (__soup.find('h1'), expected_output_2),
                        (__soup.find('p', class_='p2'), expected_output_3)]

non_acceptable_test_data = [(__soup.find('p', class_='p3'), None),
                            (__soup.find('img'), None)]