Exemplo n.º 1
0
    def test_core_class_vs_method_settings(self):
        url = "http://lassie.it/core/class_vs_method_settings.html"

        l = Lassie()
        data = l.fetch(url)

        self.assertEqual(len(data["images"]), 1)

        l.open_graph = False
        data = l.fetch(url)

        # open_graph is set to False so there shouldn't be any images in the list this time around
        self.assertEqual(len(data["images"]), 0)
Exemplo n.º 2
0
    def test_core_class_vs_method_settings(self):
        url = 'http://lassie.it/core/class_vs_method_settings.html'

        l = Lassie()
        data = l.fetch(url)

        self.assertEqual(len(data['images']), 1)

        l.open_graph = False
        data = l.fetch(url)

        # open_graph is set to False so there shouldn't be any images in the list this time around
        self.assertEqual(len(data['images']), 0)
Exemplo n.º 3
0
    def test_no_html_tag(self):
        url = 'http://lassie.it/core/no_html_tag.html'

        l = Lassie()
        data = l.fetch(url)

        self.assertTrue('no_html_tag' in data['title'])
Exemplo n.º 4
0
    def test_str_image(self):
        url = 'http://lassie.it/amp/str_image.html'

        l = Lassie()
        data = l.fetch(url)

        self.assertEqual(1, len(data['images']))
Exemplo n.º 5
0
    def test_bad_json(self):
        url = 'http://lassie.it/amp/bad_json.html'

        l = Lassie()
        data = l.fetch(url)

        self.assertTrue('amp' in data['url'])
Exemplo n.º 6
0
    def test_video_objects(self):
        url = 'http://lassie.it/amp/video_objects.html'

        l = Lassie()
        data = l.fetch(url)

        self.assertEqual(1, len(data['videos']))
Exemplo n.º 7
0
    def test_list_thumbnail_image(self):
        url = 'http://lassie.it/amp/list_thumbnail_image.html'

        l = Lassie()
        data = l.fetch(url)

        self.assertEqual(2, len(data['images']))
Exemplo n.º 8
0
    def test_bad_url(self):
        url = 'http://lassie.it/youtube/bad_url_123456.json'

        l = Lassie()
        data = l.fetch(url)

        self.assertIsNone(data.get('oembed'))
Exemplo n.º 9
0
    def test_no_html_tag(self):
        url = 'http://lassie.it/core/no_html_tag.html'

        l = Lassie()
        data = l.fetch(url)

        self.assertTrue('no_html_tag' in data['title'])
Exemplo n.º 10
0
    def test_list_json(self):
        url = 'http://lassie.it/amp/list_json.html'

        l = Lassie()
        data = l.fetch(url)

        self.assertTrue('Pixar' in data['description'])
Exemplo n.º 11
0
    def test_list_image_empty(self):
        url = 'http://lassie.it/amp/list_image_empty.html'

        l = Lassie()
        data = l.fetch(url)

        self.assertEqual(1, len(data['images']))
Exemplo n.º 12
0
    def test_youtube_good(self):
        url = 'http://lassie.it/youtube/good.json'

        l = Lassie()
        data = l.fetch(url)

        self.assertEqual(len(data['videos']), 1)
        self.assertEqual(len(data['images']), 1)
Exemplo n.º 13
0
    def test_core_class_setting_is_none(self):
        url = "http://lassie.it/core/class_setting_is_none.html"

        # This is a really odd use-case where they'd set the class attr to None, but it might happen so oh wellz.
        l = Lassie()
        l.open_graph = None
        data = l.fetch(url, open_graph=False)

        self.assertEqual(len(data["images"]), 0)
Exemplo n.º 14
0
    def test_core_class_setting_is_none(self):
        url = 'http://lassie.it/core/class_setting_is_none.html'

        # This is a really odd use-case where they'd set the class attr to None, but it might happen so oh wellz.
        l = Lassie()
        l.open_graph = None
        data = l.fetch(url, open_graph=False)

        self.assertEqual(len(data['images']), 0)
Exemplo n.º 15
0
    def test_all_properites(self):
        url = 'http://lassie.it/amp/all_properties.html'

        l = Lassie()
        data = l.fetch(url, all_images=True)

        self.assertEqual(len(data['images']), 3)

        title = 'Google Glass Is Dead, Long Live Snapchat Spectacles'
        self.assertEqual(data['title'], title)
Exemplo n.º 16
0
    def test_bad_image_dimensions(self):
        url = "http://lassie.it/core/bad_image_dimensions.html"

        l = Lassie()
        data = l.fetch(url, all_images=True)

        # lassie.utils.convert_to_int will except a TypeError or ValueError and pass (not setting a width/height on the image)
        image = data["images"][0]
        self.assertTrue(not "width" in image)
        self.assertTrue(not "height" in image)
Exemplo n.º 17
0
    def test_bad_image_dimensions(self):
        url = 'http://lassie.it/core/bad_image_dimensions.html'

        l = Lassie()
        data = l.fetch(url, all_images=True)

        # lassie.utils.convert_to_int will except a TypeError or ValueError and pass (not setting a width/height on the image)
        image = data['images'][0]
        self.assertTrue(not 'width' in image)
        self.assertTrue(not 'height' in image)
Exemplo n.º 18
0
    def test_core_retrieve_all_images(self):
        url = 'http://lassie.it/core/retrieve_all_images.html'

        l = Lassie()
        l.all_images = True

        data = l.fetch(url)
        self.assertEqual(len(data['images']), 3)

        last_image = data['images'][2]
        self.assertEqual(last_image['width'], 550)
        self.assertEqual(last_image['height'], 365)
Exemplo n.º 19
0
    def test_core_retrieve_all_images(self):
        url = 'http://lassie.it/core/retrieve_all_images.html'

        l = Lassie()
        l.all_images = True

        data = l.fetch(url)
        self.assertEqual(len(data['images']), 3)

        last_image = data['images'][2]
        self.assertEqual(last_image['width'], 550)
        self.assertEqual(last_image['height'], 365)
Exemplo n.º 20
0
    def test_core_retrieve_all_images(self):
        url = "http://lassie.it/core/retrieve_all_images.html"

        l = Lassie()
        l.all_images = True

        data = l.fetch(url)
        self.assertEqual(len(data["images"]), 3)

        last_image = data["images"][2]
        self.assertEqual(last_image["width"], 550)
        self.assertEqual(last_image["height"], 365)
Exemplo n.º 21
0
    def test_image_dimensions(self):
        url = 'http://lassie.it/core/image_dimensions.html'

        l = Lassie()
        data = l.fetch(url, all_images=True)

        self.assertEqual(len(data['images']), 4)

        image = data['images'][0]
        self.assertEqual(image['width'], 100)
        self.assertEqual(image['height'], 100)

        image = data['images'][1]
        self.assertEqual(image['width'], 100)
        self.assertEqual(image['height'], 100)

        image = data['images'][2]
        self.assertEqual(image['width'], 100)
        self.assertEqual(image['height'], 100)

        image = data['images'][3]
        self.assertEqual(image['width'], 100)
        self.assertEqual(image['height'], 100)
Exemplo n.º 22
0
    def test_image_dimensions(self):
        url = 'http://lassie.it/core/image_dimensions.html'

        l = Lassie()
        data = l.fetch(url, all_images=True)

        self.assertEqual(len(data['images']), 4)

        image = data['images'][0]
        self.assertEqual(image['width'], 100)
        self.assertEqual(image['height'], 100)

        image = data['images'][1]
        self.assertEqual(image['width'], 100)
        self.assertEqual(image['height'], 100)

        image = data['images'][2]
        self.assertEqual(image['width'], 100)
        self.assertEqual(image['height'], 100)

        image = data['images'][3]
        self.assertEqual(image['width'], 100)
        self.assertEqual(image['height'], 100)
Exemplo n.º 23
0
    def test_image_dimensions(self):
        url = "http://lassie.it/core/image_dimensions.html"

        l = Lassie()
        data = l.fetch(url, all_images=True)

        self.assertEqual(len(data["images"]), 4)

        image = data["images"][0]
        self.assertEqual(image["width"], 100)
        self.assertEqual(image["height"], 100)

        image = data["images"][1]
        self.assertEqual(image["width"], 100)
        self.assertEqual(image["height"], 100)

        image = data["images"][2]
        self.assertEqual(image["width"], 100)
        self.assertEqual(image["height"], 100)

        image = data["images"][3]
        self.assertEqual(image["width"], 100)
        self.assertEqual(image["height"], 100)
Exemplo n.º 24
0
def get_page_info(url: str, timeout: int = 4) -> Optional[PageInfo]:
    """Возвращает информацию о странице, расположенной
    по указанному адресу, либо None.

    :param url:
    :param timeout: Таймаут на подключение.

    """
    if not url:
        return None

    lassie = Lassie()
    lassie.request_opts = {'timeout': timeout}

    try:
        result = lassie.fetch(
            url,
            touch_icon=False,
            favicon=False,
        )

    except LassieError:
        # В LassieError заворачиваются исключения requests,
        # в т.ч.ошибки подключения, таймаут и пр.
        return None

    if result['status_code'] != 200:
        return None

    info = PageInfo(
        title=result.get('title', ''),
        description=result.get('description', ''),
        site_name=result.get('site_name', ''),
        images=result['images'],
    )

    return info
Exemplo n.º 25
0
    try:
        resp = webclient.head(url, timeout=10, headers={'User-Agent': user_agent})
        b['status'] = resp.status_code
    except Exception as err:
        print('Request failed: {}'.format(err))
        continue
    if b['status'] != 200:
        not_ok.append(b)
        continue

    # Follow redirects one hop.
    if resp.is_redirect:
        url = resp.headers['Location']

    if resp.headers.get('content-type', '').startswith('text/html'):
        try:
            summary = l.fetch(url)
            b['title'] = summary['title'].strip()
            b['url'] = summary['url'].strip()
        except Exception as err:
            print('Fetching {} failed with error:\n{}'.format(url, err))

    bookmarks.append(b)

data['bookmarks'] = bookmarks

with open('cleaned_' + args.bmfile, 'w') as f:
    json.dump(data, f)

with open('not_ok.json', 'w') as f:
    json.dump(not_ok, f)
Exemplo n.º 26
0
    def test_core_bad_keywords(self):
        url = 'http://lassie.it/core/bad_keywords.html'

        l = Lassie()
        data = l.fetch(url)
        self.assertEqual(data.get('keywords'), [])
Exemplo n.º 27
0
    def test_core_bad_keywords(self):
        url = 'http://lassie.it/core/bad_keywords.html'

        l = Lassie()
        data = l.fetch(url)
        self.assertEqual(data.get('keywords'), [])
Exemplo n.º 28
0
    def test_youtube_no_type(self):
        url = 'http://lassie.it/youtube/no_type.json'

        l = Lassie()
        data = l.fetch(url)
Exemplo n.º 29
0
    def test_youtube_bad_html(self):
        url = 'http://lassie.it/youtube/bad_html.json'

        l = Lassie()
        data = l.fetch(url)
Exemplo n.º 30
0
print(sample)
pprint(sample)

print("*" * 100)

sample = lassie.fetch('https://www.youtube.com/watch?v=R6IT_f0XPT8',
                      all_images=True)
print(sample)
pprint(sample)

print("*" * 100)

from lassie import Lassie

l = Lassie()
sample = l.fetch('https://www.youtube.com/watch?v=R6IT_f0XPT8')

print(sample)
pprint(sample)

print("*" * 100)

l.request_opts = {
    'headers': {
        'User-Agent':
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/605.1.15 (KHTML, like Gecko) '
        'Version/12.1.1 Safari/605.1.15 '
    }
}

l.request_opts = {'timeout': 0.1}