示例#1
0
def path_to_url(path):
    """
    Convert a path to a file: URL.  The path will be made absolute and have
    quoted path parts.
    """
    path = os.path.normpath(os.path.abspath(path))
    url = urllib_parse.urljoin('file:', urllib_request.pathname2url(path))
    return url
示例#2
0
 def links(self):
     """Yields all links in the page"""
     for anchor in self.parsed.findall(".//a"):
         if anchor.get("href"):
             href = anchor.get("href")
             url = self.clean_link(urllib_parse.urljoin(
                 self.base_url, href))
             pyrequire = anchor.get('data-requires-python')
             pyrequire = unescape(pyrequire) if pyrequire else None
             yield Link(url, self, requires_python=pyrequire)
示例#3
0
    def get_page(cls, link, skip_archives=True, session=None):
        if session is None:
            raise TypeError(
                "get_page() missing 1 required keyword argument: 'session'"
            )

        url = link.url
        url = url.split('#', 1)[0]

        # Check for VCS schemes that do not support lookup as web pages.
        from pip9.vcs import VcsSupport
        for scheme in VcsSupport.schemes:
            if url.lower().startswith(scheme) and url[len(scheme)] in '+:':
                logger.debug('Cannot look at %s URL %s', scheme, link)
                return None

        try:
            if skip_archives:
                filename = link.filename
                for bad_ext in ARCHIVE_EXTENSIONS:
                    if filename.endswith(bad_ext):
                        content_type = cls._get_content_type(
                            url, session=session,
                        )
                        if content_type.lower().startswith('text/html'):
                            break
                        else:
                            logger.debug(
                                'Skipping page %s because of Content-Type: %s',
                                link,
                                content_type,
                            )
                            return

            logger.debug('Getting page %s', url)

            # Tack index.html onto file:// URLs that point to directories
            (scheme, netloc, path, params, query, fragment) = \
                urllib_parse.urlparse(url)
            if (scheme == 'file' and
                    os.path.isdir(urllib_request.url2pathname(path))):
                # add trailing slash if not present so urljoin doesn't trim
                # final segment
                if not url.endswith('/'):
                    url += '/'
                url = urllib_parse.urljoin(url, 'index.html')
                logger.debug(' file: URL is directory, getting %s', url)

            resp = session.get(
                url,
                headers={
                    "Accept": "text/html",
                    "Cache-Control": "max-age=600",
                },
            )
            resp.raise_for_status()

            # The check for archives above only works if the url ends with
            # something that looks like an archive. However that is not a
            # requirement of an url. Unless we issue a HEAD request on every
            # url we cannot know ahead of time for sure if something is HTML
            # or not. However we can check after we've downloaded it.
            content_type = resp.headers.get('Content-Type', 'unknown')
            if not content_type.lower().startswith("text/html"):
                logger.debug(
                    'Skipping page %s because of Content-Type: %s',
                    link,
                    content_type,
                )
                return

            inst = cls(resp.content, resp.url, resp.headers)
        except requests.HTTPError as exc:
            cls._handle_fail(link, exc, url)
        except SSLError as exc:
            reason = ("There was a problem confirming the ssl certificate: "
                      "%s" % exc)
            cls._handle_fail(link, reason, url, meth=logger.info)
        except requests.ConnectionError as exc:
            cls._handle_fail(link, "connection error: %s" % exc, url)
        except requests.Timeout:
            cls._handle_fail(link, "timed out", url)
        else:
            return inst
示例#4
0
 def url_to_path(self, path):
     return urllib_parse.urljoin(self.url, path)
示例#5
0
def process_line(line,
                 filename,
                 line_number,
                 finder=None,
                 comes_from=None,
                 options=None,
                 session=None,
                 wheel_cache=None,
                 constraint=False):
    """Process a single requirements line; This can result in creating/yielding
    requirements, or updating the finder.

    For lines that contain requirements, the only options that have an effect
    are from SUPPORTED_OPTIONS_REQ, and they are scoped to the
    requirement. Other options from SUPPORTED_OPTIONS may be present, but are
    ignored.

    For lines that do not contain requirements, the only options that have an
    effect are from SUPPORTED_OPTIONS. Options from SUPPORTED_OPTIONS_REQ may
    be present, but are ignored. These lines may contain multiple options
    (although our docs imply only one is supported), and all our parsed and
    affect the finder.

    :param constraint: If True, parsing a constraints file.
    :param options: OptionParser options that we may update
    """
    parser = build_parser()
    defaults = parser.get_default_values()
    defaults.index_url = None
    if finder:
        # `finder.format_control` will be updated during parsing
        defaults.format_control = finder.format_control
    args_str, options_str = break_args_options(line)
    if sys.version_info < (2, 7, 3):
        # Prior to 2.7.3, shlex cannot deal with unicode entries
        options_str = options_str.encode('utf8')
    opts, _ = parser.parse_args(shlex.split(options_str), defaults)

    # preserve for the nested code path
    line_comes_from = '%s %s (line %s)' % ('-c' if constraint else '-r',
                                           filename, line_number)

    # yield a line requirement
    if args_str:
        isolated = options.isolated_mode if options else False
        if options:
            cmdoptions.check_install_build_global(options, opts)
        # get the options that apply to requirements
        req_options = {}
        for dest in SUPPORTED_OPTIONS_REQ_DEST:
            if dest in opts.__dict__ and opts.__dict__[dest]:
                req_options[dest] = opts.__dict__[dest]
        yield InstallRequirement.from_line(args_str,
                                           line_comes_from,
                                           constraint=constraint,
                                           isolated=isolated,
                                           options=req_options,
                                           wheel_cache=wheel_cache)

    # yield an editable requirement
    elif opts.editables:
        isolated = options.isolated_mode if options else False
        default_vcs = options.default_vcs if options else None
        yield InstallRequirement.from_editable(opts.editables[0],
                                               comes_from=line_comes_from,
                                               constraint=constraint,
                                               default_vcs=default_vcs,
                                               isolated=isolated,
                                               wheel_cache=wheel_cache)

    # parse a nested requirements file
    elif opts.requirements or opts.constraints:
        if opts.requirements:
            req_path = opts.requirements[0]
            nested_constraint = False
        else:
            req_path = opts.constraints[0]
            nested_constraint = True
        # original file is over http
        if SCHEME_RE.search(filename):
            # do a url join so relative paths work
            req_path = urllib_parse.urljoin(filename, req_path)
        # original file and nested file are paths
        elif not SCHEME_RE.search(req_path):
            # do a join so relative paths work
            req_path = os.path.join(os.path.dirname(filename), req_path)
        # TODO: Why not use `comes_from='-r {} (line {})'` here as well?
        parser = parse_requirements(req_path,
                                    finder,
                                    comes_from,
                                    options,
                                    session,
                                    constraint=nested_constraint,
                                    wheel_cache=wheel_cache)
        for req in parser:
            yield req

    # percolate hash-checking option upward
    elif opts.require_hashes:
        options.require_hashes = opts.require_hashes

    # set finder options
    elif finder:
        if opts.allow_external:
            warnings.warn(
                "--allow-external has been deprecated and will be removed in "
                "the future. Due to changes in the repository protocol, it no "
                "longer has any effect.",
                RemovedInPip10Warning,
            )

        if opts.allow_all_external:
            warnings.warn(
                "--allow-all-external has been deprecated and will be removed "
                "in the future. Due to changes in the repository protocol, it "
                "no longer has any effect.",
                RemovedInPip10Warning,
            )

        if opts.allow_unverified:
            warnings.warn(
                "--allow-unverified has been deprecated and will be removed "
                "in the future. Due to changes in the repository protocol, it "
                "no longer has any effect.",
                RemovedInPip10Warning,
            )

        if opts.index_url:
            finder.index_urls = [opts.index_url]
        if opts.use_wheel is False:
            finder.use_wheel = False
            pip9.index.fmt_ctl_no_use_wheel(finder.format_control)
        if opts.no_index is True:
            finder.index_urls = []
        if opts.extra_index_urls:
            finder.index_urls.extend(opts.extra_index_urls)
        if opts.find_links:
            # FIXME: it would be nice to keep track of the source
            # of the find_links: support a find-links local path
            # relative to a requirements file.
            value = opts.find_links[0]
            req_dir = os.path.dirname(os.path.abspath(filename))
            relative_to_reqs_file = os.path.join(req_dir, value)
            if os.path.exists(relative_to_reqs_file):
                value = relative_to_reqs_file
            finder.find_links.append(value)
        if opts.pre:
            finder.allow_all_prereleases = True
        if opts.process_dependency_links:
            finder.process_dependency_links = True
        if opts.trusted_hosts:
            finder.secure_origins.extend(
                ("*", host, "*") for host in opts.trusted_hosts)