def test_is_external_no_port(): with pytest.raises(ValueError): is_external( url_parse('http://localhost/a/'), url_parse('http://localhost:80/a/'), ) with pytest.raises(ValueError): is_external( url_parse('http://localhost:80/a/'), url_parse('http://localhost/a/'), )
def add_task(self, url: URL, *, external_ok: bool = False, reason: str = None) -> Optional[Task]: """Add a task to freeze the given URL If no task is added (e.g. for external URLs), return None. """ if is_external(url, self.prefix): if external_ok: return None raise ExternalURLError(f'Unexpected external URL: {url}') path = get_path_from_url(self.prefix, url, self.url_to_path) for queue in self.task_queues.values(): if path in queue: task = queue[path] task.urls.add(url) break else: # The `else` branch is entered if the loop ended normally # (not with `break`, or exception, return, etc.) # Here, this means the task wasn't found. task = Task(path, {url}, self) self.pending_tasks[path] = task if reason: task.reasons.add(reason) return task
def url_to_filename(self, parsed_url): """Return the filename to which the page is frozen. Parameters: parsed_url Parsed URL (eg. url_parse(http://example.com:8000/foo/second.html)) to convert to filename """ if is_external(parsed_url, self.prefix): raise ValueError(f'external url {parsed_url}') url_path = parsed_url.path if url_path.startswith(self.prefix.path): url_path = '/' + url_path[len(self.prefix.path):] if url_path.endswith('/'): url_path = url_path + 'index.html' return self.base_path / encode_file_path(url_path).lstrip('/')
def get_path_from_url(prefix, url, url_to_path): if is_external(url, prefix): raise ValueError(f'external url {url}') path = url.path if path.startswith(prefix.path): path = path[len(prefix.path):] result = url_to_path(path) result = PurePosixPath(result) if result.is_absolute(): raise ValueError( f"Path may not be absolute: {result}(from {url.to_url()})") assert '.' not in result.parts if '..' in result.parts: raise ValueError( f"Path may not contain /../ segment: {result}(from {url.to_url()})" ) return result
def test_is_external_positive(): assert not is_external( url_parse('http://localhost:80/a/b/c'), url_parse('http://localhost:80/a/'), )
def test_is_external_root_index(): assert not is_external( url_parse('http://localhost:80'), url_parse('http://localhost:80/'), )
def test_is_external_index(): assert is_external( url_parse('http://localhost:80/a'), url_parse('http://localhost:80/a/'), )
def test_is_external_same(): assert not is_external( url_parse('http://localhost:80/a/'), url_parse('http://localhost:80/a/'), )
def test_is_external_negative_same_host(): assert is_external( url_parse('http://localhost:80/a/'), url_parse('http://localhost:80/a/b/c/'), )
def test_is_external_negative(): assert is_external( url_parse('http://example.com:80/a/'), url_parse('http://localhost:80/a/b/c/'), )