Exemplo n.º 1
0
def check_url(url, expected):
    url_parts = urlsplit(url)
    if urlsplit(expected).scheme not in uses_relative:
        expected = add_leading_slash(expected)
    expected_parts = urlsplit(expected, scheme='file')

    if url_parts.scheme != expected_parts.scheme:
        raise URLSchemeMismatchError("Schemes differ.")

    if url_parts.netloc != expected_parts.netloc:
        raise URLNetLocationMismatchError("Netloc parts differ.")

    if url_parts.query != expected_parts.query:
        raise URLQueryMismatchError("Query parts differ.")

    if url_parts.fragment != expected_parts.fragment:
        raise URLFragmentMismatchError("Fragment parts differ.")

    if is_windows_path(url_parts.path) or is_windows_path(expected_parts.path):
        path = PureWindowsPath(url_parts.path)
        expected_path = PureWindowsPath(add_leading_slash(expected_parts.path))
    else:
        path = PurePath(url_parts.path)
        expected_path = PurePath(expected_parts.path)

    if path != expected_path:
        raise URLPathMismatchError("Paths differ.")
Exemplo n.º 2
0
    def check_url(self, url, expected):

        # FIXME:

        url_parts = urlsplit(url)

        url_parts2 = normapppathx(url, appsplit=True)
        url_parts3 = splitapppathx(url, appsplit=True)

        print("4TEST:url_parts  = " + str(url_parts))
        print("4TEST:url_parts2 = " + str(url_parts2))
        print("4TEST:url_parts2 = " + str(url_parts3))

        if urlsplit(expected).scheme not in uses_relative:
            expected = add_leading_slash(expected)
        expected_parts = urlsplit(expected, scheme='file')

        self.assertEqual(url_parts.scheme, expected_parts.scheme,
                         "Schemes differ.")
        self.assertEqual(url_parts.netloc, expected_parts.netloc,
                         "Netloc parts differ.")
        self.assertEqual(url_parts.query, expected_parts.query,
                         "Query parts differ.")
        self.assertEqual(url_parts.fragment, expected_parts.fragment,
                         "Fragment parts differ.")

        if is_windows_path(url_parts.path) or is_windows_path(
                expected_parts.path):
            path = PureWindowsPath(url_parts.path)
            expected_path = PureWindowsPath(
                add_leading_slash(expected_parts.path))
        else:
            path = PurePath(url_parts.path)
            expected_path = PurePath(expected_parts.path)
        self.assertEqual(path, expected_path, "Paths differ.")
Exemplo n.º 3
0
    def load(self):
        """
        Loads the XML text from the data source. If the data source is an Element
        the source XML text can't be retrieved.
        """
        if self._url is None:
            return  # Created from Element or text source --> already loaded

        resource = self.open()
        try:
            data = resource.read()
        except (OSError, IOError) as err:
            raise XMLSchemaOSError("cannot load data from %r: %s" % (self._url, err))
        finally:
            resource.close()

        try:
            self._text = data.decode('utf-8') if PY3 else data.encode('utf-8')
        except UnicodeDecodeError:
            if PY3:
                self._text = data.decode('iso-8859-1')
            else:
                with codecs.open(urlsplit(self._url).path, mode='rb', encoding='iso-8859-1') as f:
                    self._text = f.read().encode('iso-8859-1')
Exemplo n.º 4
0
def normalize_url(url, base_url=None, keep_relative=False):
    """
    Returns a normalized URL doing a join with a base URL. URL scheme defaults to 'file' and
    backslashes are replaced with slashes. For file paths the os.path.join is used instead of
    urljoin.

    :param url: a relative or absolute URL.
    :param base_url: the reference base URL for construct the normalized URL from the argument. \
    For compatibility between "os.path.join" and "urljoin" a trailing '/' is added to not empty paths.
    :param keep_relative: if set to `True` keeps relative file paths, which would not strictly \
    conformant to URL format specification.
    :return: A normalized URL.
    """
    def add_trailing_slash(r):
        return urlunsplit((r[0], r[1], r[2] + '/' if r[2] and r[2][-1] != '/' else r[2], r[3], r[4]))

    if base_url is not None:
        base_url = base_url.replace('\\', '/')
        base_url_parts = urlsplit(base_url)
        base_url = add_trailing_slash(base_url_parts)
        if base_url_parts.scheme not in uses_relative:
            base_url_parts = urlsplit('file:///{}'.format(base_url))
        else:
            base_url_parts = urlsplit(base_url)

        if base_url_parts.scheme not in ('', 'file'):
            url = urljoin(base_url, url)
        else:
            # For file schemes uses the os.path.join instead of urljoin
            url_parts = urlsplit(url)
            if url_parts.scheme not in ('', 'file'):
                url = urljoin(base_url, url)
            elif not url_parts.netloc or base_url_parts.netloc == url_parts.netloc:
                # Join paths only if host parts (netloc) are equal
                url = urlunsplit((
                    '',
                    base_url_parts.netloc,
                    os.path.normpath(os.path.join(base_url_parts.path, url_parts.path)),
                    url_parts.query,
                    url_parts.fragment,
                ))

    url = url.replace('\\', '/')
    url_parts = urlsplit(url, scheme='file')
    if url_parts.scheme not in uses_relative:
        return 'file:///{}'.format(url_parts.geturl())  # Eg. k:/Python/lib/....
    elif url_parts.scheme != 'file':
        return urlunsplit((
            url_parts.scheme,
            url_parts.netloc,
            pathname2url(url_parts.path),
            url_parts.query,
            url_parts.fragment,
        ))
    elif os.path.isabs(url_parts.path):
        return url_parts.geturl()
    elif keep_relative:
        # Can't use urlunsplit with a scheme because it converts relative paths to absolute ones.
        return 'file:{}'.format(urlunsplit(('',) + url_parts[1:]))
    else:
        return urlunsplit((
            url_parts.scheme,
            url_parts.netloc,
            os.path.abspath(url_parts.path),
            url_parts.query,
            url_parts.fragment,
        ))
Exemplo n.º 5
0
def url_path_is_file(url):
    return os.path.isfile(urlsplit(url).path)
Exemplo n.º 6
0
def url_path_is_directory(url):
    return os.path.isdir(urlsplit(url).path)
Exemplo n.º 7
0
def is_remote_url(url):
    return url is not None and urlsplit(url).scheme not in ('', 'file')