def test_web_processor_parse_url(self): self.assertTrue( WebProcessorSession.parse_url('http://example.com', 'utf-8')) self.assertFalse(WebProcessorSession.parse_url('http://', 'utf-8')) self.assertFalse(WebProcessorSession.parse_url('', 'utf-8')) self.assertFalse( WebProcessorSession.parse_url('.xn--hda.com/', 'utf-8'))
def test_web_processor_parse_url(self): self.assertTrue( WebProcessorSession._parse_url('http://example.com', 'utf-8') ) self.assertFalse( WebProcessorSession._parse_url('http://', 'utf-8') ) self.assertFalse( WebProcessorSession._parse_url('', 'utf-8') )
def _add_hooked_url(self, url_item, new_url_dict): '''Process the ``dict`` from the script and add the URLs.''' to_native = self.to_script_native_type url = new_url_dict[to_native('url')] link_type = self.get_from_native_dict(new_url_dict, 'link_type') inline = self.get_from_native_dict(new_url_dict, 'inline') post_data = self.get_from_native_dict(new_url_dict, 'post_data') replace = self.get_from_native_dict(new_url_dict, 'replace') assert url # FIXME: resolve circular imports from wpull.processor import WebProcessorSession url_info = WebProcessorSession.parse_url(url, 'utf-8') if not url_info: return kwargs = dict(link_type=link_type, post_data=post_data) if replace: url_item.url_table.remove([url]) if inline: url_item.add_inline_url_infos([url_info], **kwargs) else: url_item.add_linked_url_infos([url_info], **kwargs)