Exemplo n.º 1
0
def parse_netloc(netloc):
    # type: (str) -> Tuple[str, Optional[int]]
    """
    Return the host-port pair from a netloc.
    """
    url = build_url_from_netloc(netloc)
    parsed = urllib_parse.urlparse(url)
    return parsed.hostname, parsed.port
Exemplo n.º 2
0
def _get_html_page(link, session=None):
    # type: (Link, Optional[PipSession]) -> Optional[HTMLPage]
    if session is None:
        raise TypeError(
            "_get_html_page() missing 1 required keyword argument: 'session'")

    url = link.url.split('#', 1)[0]

    # Check for VCS schemes that do not support lookup as web pages.
    vcs_scheme = _match_vcs_scheme(url)
    if vcs_scheme:
        logger.debug('Cannot look at %s URL %s', vcs_scheme, link)
        return None

    # Tack index.html onto file:// URLs that point to directories
    scheme, _, path, _, _, _ = urllib_parse.urlparse(url)
    if (scheme == 'file' and os.path.isdir(urllib_request.url2pathname(path))):
        # add trailing slash if not present so urljoin doesn't trim
        # final segment
        if not url.endswith('/'):
            url += '/'
        url = urllib_parse.urljoin(url, 'index.html')
        logger.debug(' file: URL is directory, getting %s', url)

    try:
        resp = _get_html_response(url, session=session)
    except _NotHTTP:
        logger.debug(
            'Skipping page %s because it looks like an archive, and cannot '
            'be checked by HEAD.',
            link,
        )
    except _NotHTML as exc:
        logger.debug(
            'Skipping page %s because the %s request got Content-Type: %s',
            link,
            exc.request_desc,
            exc.content_type,
        )
    except HTTPError as exc:
        _handle_get_page_fail(link, exc)
    except RetryError as exc:
        _handle_get_page_fail(link, exc)
    except SSLError as exc:
        reason = "There was a problem confirming the ssl certificate: "
        reason += str(exc)
        _handle_get_page_fail(link, reason, meth=logger.info)
    except requests.ConnectionError as exc:
        _handle_get_page_fail(link, "connection error: {}".format(exc))
    except requests.Timeout:
        _handle_get_page_fail(link, "timed out")
    else:
        return _make_html_page(resp,
                               cache_link_parsing=link.cache_link_parsing)
    return None
Exemplo n.º 3
0
def _clean_link(url):
    # type: (str) -> str
    """
    Make sure a link is fully quoted.
    For example, if ' ' occurs in the URL, it will be replaced with "%20",
    and without double-quoting other characters.
    """
    # Split the URL into parts according to the general structure
    # `scheme://netloc/path;parameters?query#fragment`.
    result = urllib_parse.urlparse(url)
    # If the netloc is empty, then the URL refers to a local filesystem path.
    is_local_path = not result.netloc
    path = _clean_url_path(result.path, is_local_path=is_local_path)
    return urllib_parse.urlunparse(result._replace(path=path))
Exemplo n.º 4
0
    def handle_401(self, resp, **kwargs):
        # We only care about 401 responses, anything else we want to just
        #   pass through the actual response
        if resp.status_code != 401:
            return resp

        # We are not able to prompt the user so simply return the response
        if not self.prompting:
            return resp

        parsed = urllib_parse.urlparse(resp.url)

        # Prompt the user for a new username and password
        username, password, save = self._prompt_for_password(parsed.netloc)

        # Store the new username and password to use for future requests
        self._credentials_to_save = None
        if username is not None and password is not None:
            self.passwords[parsed.netloc] = (username, password)

            # Prompt to save the password to keyring
            if save and self._should_save_password_to_keyring():
                self._credentials_to_save = (parsed.netloc, username, password)

        # Consume content and release the original connection to allow our new
        #   request to reuse the same one.
        resp.content
        resp.raw.release_conn()

        # Add our new username and password to the request
        req = HTTPBasicAuth(username or "", password or "")(resp.request)
        req.register_hook("response", self.warn_on_401)

        # On successful request, save the credentials that were used to
        # keyring. (Note that if the user responded "no" above, this member
        # is not set and nothing will be saved.)
        if self._credentials_to_save:
            req.register_hook("response", self.save_credentials)

        # Send our new request
        new_resp = resp.connection.send(req, **kwargs)
        new_resp.history.append(resp)

        return new_resp
Exemplo n.º 5
0
    def create(
            cls,
            find_links,  # type: List[str]
            index_urls,  # type: List[str]
    ):
        # type: (...) -> SearchScope
        """
        Create a SearchScope object after normalizing the `find_links`.
        """
        # Build find_links. If an argument starts with ~, it may be
        # a local file relative to a home directory. So try normalizing
        # it and if it exists, use the normalized version.
        # This is deliberately conservative - it might be fine just to
        # blindly normalize anything starting with a ~...
        built_find_links = []  # type: List[str]
        for link in find_links:
            if link.startswith('~'):
                new_link = normalize_path(link)
                if os.path.exists(new_link):
                    link = new_link
            built_find_links.append(link)

        # If we don't have TLS enabled, then WARN if anyplace we're looking
        # relies on TLS.
        if not has_tls():
            for link in itertools.chain(index_urls, built_find_links):
                parsed = urllib_parse.urlparse(link)
                if parsed.scheme == 'https':
                    logger.warning(
                        'pip is configured with locations that require '
                        'TLS/SSL, however the ssl module in Python is not '
                        'available.')
                    break

        return cls(
            find_links=built_find_links,
            index_urls=index_urls,
        )
Exemplo n.º 6
0
    def __init__(self, requirement_string):
        # type: (str) -> None
        try:
            req = REQUIREMENT.parseString(requirement_string)
        except ParseException as e:
            raise InvalidRequirement('Parse error at "{0!r}": {1}'.format(
                requirement_string[e.loc:e.loc + 8], e.msg))

        self.name = req.name
        if req.url:
            parsed_url = urlparse.urlparse(req.url)
            if parsed_url.scheme == "file":
                if urlparse.urlunparse(parsed_url) != req.url:
                    raise InvalidRequirement("Invalid URL given")
            elif not (parsed_url.scheme and parsed_url.netloc) or (
                    not parsed_url.scheme and not parsed_url.netloc):
                raise InvalidRequirement("Invalid URL: {0}".format(req.url))
            self.url = req.url
        else:
            self.url = None
        self.extras = set(req.extras.asList() if req.extras else [])
        self.specifier = SpecifierSet(req.specifier)
        self.marker = req.marker if req.marker else None
Exemplo n.º 7
0
 def __init__(self, index_url, session, use_datetime=False):
     xmlrpc_client.Transport.__init__(self, use_datetime)
     index_parts = urllib_parse.urlparse(index_url)
     self._scheme = index_parts.scheme
     self._session = session
Exemplo n.º 8
0
    def is_secure_origin(self, location):
        # type: (Link) -> bool
        # Determine if this url used a secure transport mechanism
        parsed = urllib_parse.urlparse(str(location))
        origin_protocol, origin_host, origin_port = (
            parsed.scheme,
            parsed.hostname,
            parsed.port,
        )

        # The protocol to use to see if the protocol matches.
        # Don't count the repository type as part of the protocol: in
        # cases such as "git+ssh", only use "ssh". (I.e., Only verify against
        # the last scheme.)
        origin_protocol = origin_protocol.rsplit('+', 1)[-1]

        # Determine if our origin is a secure origin by looking through our
        # hardcoded list of secure origins, as well as any additional ones
        # configured on this PackageFinder instance.
        for secure_origin in self.iter_secure_origins():
            secure_protocol, secure_host, secure_port = secure_origin
            if origin_protocol != secure_protocol and secure_protocol != "*":
                continue

            try:
                addr = ipaddress.ip_address(None if origin_host is None else
                                            six.ensure_text(origin_host))
                network = ipaddress.ip_network(six.ensure_text(secure_host))
            except ValueError:
                # We don't have both a valid address or a valid network, so
                # we'll check this origin against hostnames.
                if (origin_host and origin_host.lower() != secure_host.lower()
                        and secure_host != "*"):
                    continue
            else:
                # We have a valid address and network, so see if the address
                # is contained within the network.
                if addr not in network:
                    continue

            # Check to see if the port matches.
            if (origin_port != secure_port and secure_port != "*"
                    and secure_port is not None):
                continue

            # If we've gotten here, then this origin matches the current
            # secure origin and we should return True
            return True

        # If we've gotten to this point, then the origin isn't secure and we
        # will not accept it as a valid location to search. We will however
        # log a warning that we are ignoring it.
        logger.warning(
            "The repository located at %s is not a trusted or secure host and "
            "is being ignored. If this repository is available via HTTPS we "
            "recommend you use HTTPS instead, otherwise you may silence "
            "this warning and allow it anyway with '--trusted-host %s'.",
            origin_host,
            origin_host,
        )

        return False