def test_link_preview_no_content_type_header(self) -> None: user = self.example_user("hamlet") self.login_user(user) url = "http://test.org/" with mock_queue_publish( "zerver.actions.message_send.queue_json_publish") as patched: msg_id = self.send_stream_message(user, "Denmark", topic_name="foo", content=url) patched.assert_called_once() queue = patched.call_args[0][0] self.assertEqual(queue, "embed_links") event = patched.call_args[0][1] self.create_mock_response(url) with self.settings(TEST_SUITE=False): with self.assertLogs(level="INFO") as info_logs: FetchLinksEmbedData().consume(event) cached_data = cache_get(preview_url_cache_key(url))[0] self.assertTrue( "INFO:root:Time spent on get_link_embed_data for http://test.org/: " in info_logs.output[0]) assert cached_data is not None msg = Message.objects.select_related("sender").get(id=msg_id) self.assertIn(cached_data.title, msg.rendered_content) assert cached_data.image is not None self.assertIn(re.sub(r"([^\w-])", r"\\\1", cached_data.image), msg.rendered_content)
def test_invalid_url(self) -> None: url = "http://test.org/" error_url = "http://test.org/x" with mock_queue_publish( "zerver.actions.message_send.queue_json_publish"): msg_id = self.send_personal_message( self.example_user("hamlet"), self.example_user("cordelia"), content=error_url, ) msg = Message.objects.select_related("sender").get(id=msg_id) event = { "message_id": msg_id, "urls": [error_url], "message_realm_id": msg.sender.realm_id, "message_content": error_url, } self.create_mock_response(error_url, status=404) with self.settings(TEST_SUITE=False): with self.assertLogs(level="INFO") as info_logs: FetchLinksEmbedData().consume(event) self.assertTrue( "INFO:root:Time spent on get_link_embed_data for http://test.org/x: " in info_logs.output[0]) # FIXME: Should we really cache this, especially without cache invalidation? cached_data = cache_get(preview_url_cache_key(error_url))[0] self.assertIsNone(cached_data) msg.refresh_from_db() self.assertEqual( '<p><a href="http://test.org/x">http://test.org/x</a></p>', msg.rendered_content) self.assertTrue(responses.assert_call_count(url, 0))
def test_link_preview_non_html_data(self) -> None: user = self.example_user("hamlet") self.login_user(user) url = "http://test.org/audio.mp3" with mock_queue_publish( "zerver.actions.message_send.queue_json_publish") as patched: msg_id = self.send_stream_message(user, "Denmark", topic_name="foo", content=url) patched.assert_called_once() queue = patched.call_args[0][0] self.assertEqual(queue, "embed_links") event = patched.call_args[0][1] content_type = "application/octet-stream" self.create_mock_response(url, content_type=content_type) with self.settings(TEST_SUITE=False): with self.assertLogs(level="INFO") as info_logs: FetchLinksEmbedData().consume(event) cached_data = cache_get(preview_url_cache_key(url))[0] self.assertTrue( "INFO:root:Time spent on get_link_embed_data for http://test.org/audio.mp3: " in info_logs.output[0]) self.assertIsNone(cached_data) msg = Message.objects.select_related("sender").get(id=msg_id) self.assertEqual( ('<p><a href="http://test.org/audio.mp3">' "http://test.org/audio.mp3</a></p>"), msg.rendered_content, )
def test_link_preview_open_graph_image_missing_content(self) -> None: user = self.example_user("hamlet") self.login_user(user) url = "http://test.org/foo.html" with mock_queue_publish("zerver.actions.message_send.queue_json_publish") as patched: msg_id = self.send_stream_message(user, "Denmark", topic_name="foo", content=url) patched.assert_called_once() queue = patched.call_args[0][0] self.assertEqual(queue, "embed_links") event = patched.call_args[0][1] # HTML without the og:image metadata html = "\n".join( line if "og:image" not in line else '<meta property="og:image"/>' for line in self.open_graph_html.splitlines() ) self.create_mock_response(url, body=html) with self.settings(TEST_SUITE=False): with self.assertLogs(level="INFO") as info_logs: FetchLinksEmbedData().consume(event) cached_data = cache_get(preview_url_cache_key(url))[0] self.assertTrue( "INFO:root:Time spent on get_link_embed_data for http://test.org/foo.html: " in info_logs.output[0] ) assert cached_data is not None self.assertIsNotNone(cached_data.title) self.assertIsNone(cached_data.image) msg = Message.objects.select_related("sender").get(id=msg_id) self.assertEqual( ('<p><a href="http://test.org/foo.html">' "http://test.org/foo.html</a></p>"), msg.rendered_content, )
def test_link_embed_data_from_cache(self) -> None: url = "http://test.org/" link_embed_data = "test data" with self.assertRaises(NotFoundInCache): link_embed_data_from_cache(url) with self.settings(CACHES=TEST_CACHES): key = preview_url_cache_key(url) cache_set(key, link_embed_data, "database") self.assertEqual(link_embed_data, link_embed_data_from_cache(url))
def test_link_embed_data_from_cache(self) -> None: url = 'http://test.org/' link_embed_data = 'test data' with self.assertRaises(NotFoundInCache): link_embed_data_from_cache(url) with self.settings(CACHES=TEST_CACHES): key = preview_url_cache_key(url) cache_set(key, link_embed_data, 'database') self.assertEqual(link_embed_data, link_embed_data_from_cache(url))
def _send_message_with_test_org_url(self, sender: UserProfile, queue_should_run: bool = True, relative_url: bool = False) -> Message: url = "http://test.org/" # Ensure the cache for this is empty cache_delete(preview_url_cache_key(url)) with mock_queue_publish( "zerver.actions.message_send.queue_json_publish") as patched: msg_id = self.send_personal_message( sender, self.example_user("cordelia"), content=url, ) if queue_should_run: patched.assert_called_once() queue = patched.call_args[0][0] self.assertEqual(queue, "embed_links") event = patched.call_args[0][1] else: patched.assert_not_called() # If we nothing was put in the queue, we don't need to # run the queue processor or any of the following code return Message.objects.select_related("sender").get(id=msg_id) # Verify the initial message doesn't have the embedded links rendered msg = Message.objects.select_related("sender").get(id=msg_id) self.assertNotIn(f'<a href="{url}" title="The Rock">The Rock</a>', msg.rendered_content) self.create_mock_response(url, relative_url=relative_url) # Run the queue processor to potentially rerender things with self.settings(TEST_SUITE=False): with self.assertLogs(level="INFO") as info_logs: FetchLinksEmbedData().consume(event) self.assertTrue( "INFO:root:Time spent on get_link_embed_data for http://test.org/: " in info_logs.output[0]) msg = Message.objects.select_related("sender").get(id=msg_id) return msg
def test_valid_content_type_error_get_data(self) -> None: url = "http://test.org/" with mock_queue_publish( "zerver.actions.message_send.queue_json_publish"): msg_id = self.send_personal_message( self.example_user("hamlet"), self.example_user("cordelia"), content=url, ) msg = Message.objects.select_related("sender").get(id=msg_id) event = { "message_id": msg_id, "urls": [url], "message_realm_id": msg.sender.realm_id, "message_content": url, } self.create_mock_response(url, body=ConnectionError()) with mock.patch( "zerver.lib.url_preview.preview.get_oembed_data", side_effect=lambda *args, **kwargs: None, ): with mock.patch( "zerver.lib.url_preview.preview.valid_content_type", side_effect=lambda k: True): with self.settings(TEST_SUITE=False): with self.assertLogs(level="INFO") as info_logs: FetchLinksEmbedData().consume(event) self.assertTrue( "INFO:root:Time spent on get_link_embed_data for http://test.org/: " in info_logs.output[0]) # This did not get cached -- hence the lack of [0] on the cache_get cached_data = cache_get(preview_url_cache_key(url)) self.assertIsNone(cached_data) msg.refresh_from_db() self.assertEqual( '<p><a href="http://test.org/">http://test.org/</a></p>', msg.rendered_content)
def test_safe_oembed_html_url(self) -> None: url = "http://test.org/" with mock_queue_publish( "zerver.actions.message_send.queue_json_publish"): msg_id = self.send_personal_message( self.example_user("hamlet"), self.example_user("cordelia"), content=url, ) msg = Message.objects.select_related("sender").get(id=msg_id) event = { "message_id": msg_id, "urls": [url], "message_realm_id": msg.sender.realm_id, "message_content": url, } mocked_data = UrlOEmbedData( html=f'<iframe src="{url}"></iframe>', type="video", image=f"{url}/image.png", ) self.create_mock_response(url) with self.settings(TEST_SUITE=False): with self.assertLogs(level="INFO") as info_logs: with mock.patch( "zerver.lib.url_preview.preview.get_oembed_data", lambda *args, **kwargs: mocked_data, ): FetchLinksEmbedData().consume(event) cached_data = cache_get(preview_url_cache_key(url))[0] self.assertTrue( "INFO:root:Time spent on get_link_embed_data for http://test.org/: " in info_logs.output[0]) self.assertEqual(cached_data, mocked_data) msg.refresh_from_db() self.assertIn(f'a data-id="{escape(mocked_data.html)}"', msg.rendered_content)