def test_get_local_url(self): self.assertEqual( uu.get_local_url('/home/bart/doc', 'http://doc.com/foo/bar.txt'), '/home/bart/doc/foo/bar.txt') self.assertEqual( uu.get_local_url('/home/bart/doc/', 'http://doc.com/foo/bar/'), '/home/bart/doc/foo/bar/bar__root.html') self.assertEqual(uu.get_local_url('/home/bart/doc', 'http://doc.com/'), '/home/bart/doc/root__root.html')
def page_error(self, input_url, pages): local_url = get_local_url(self.output_url, get_url_without_hash(input_url)) self.logger.info( 'This page could not be downloaded: {0} in {1}'.format( input_url, local_url)) error_page = DocumentPage(input, None, []) pages[local_url] = error_page
def make_copy(self, url_to_copy, binary=False): destination_url = get_local_url(self.output_url, url_to_copy) try: ensure_path_exists(destination_url) except: raise Exception('Could not make copy of {0} in {1}'.format( url_to_copy, destination_url)) download_file(url_to_copy, get_path_from_url(destination_url), force=False, binary=binary) return destination_url
def process_page_links(self, tree, local_url, url): link_tags = self.links(tree) links = [] for link_tag in link_tags: attributes = link_tag.attrib href = '' if 'href' in attributes: href = attributes['href'] link_url = get_url_without_hash(urlparse.urljoin(url, href)) local_url_to = get_local_url(self.output_url, link_url) local_url_to = get_sanitized_url(local_url_to) link = DocumentLink(link_url, local_url_to) links.append(link) else: continue return links
def test_get_local_url(self): self.assertEqual(uu.get_local_url("/home/bart/doc", "http://doc.com/foo/bar.txt"), "/home/bart/doc/foo/bar.txt") self.assertEqual( uu.get_local_url("/home/bart/doc/", "http://doc.com/foo/bar/"), "/home/bart/doc/foo/bar/bar__root.html" ) self.assertEqual(uu.get_local_url("/home/bart/doc", "http://doc.com/"), "/home/bart/doc/root__root.html")