Пример #1
0
def abort_if_invalid_filename(name, filename):
    if not is_valid_archive_name(filename):
        abort_submit(400, "%r is not a valid archive name" % (filename))
    if normalize_name(filename).startswith(normalize_name(name)):
        return
    abort_submit(
        400, "filename %r does not match project name %r" % (filename, name))
Пример #2
0
def abort_if_invalid_filename(name, filename):
    if not is_valid_archive_name(filename):
        abort_submit(400, "%r is not a valid archive name" %(filename))
    if normalize_name(filename).startswith(normalize_name(name)):
        return
    abort_submit(400, "filename %r does not match project name %r"
                      %(filename, name))
Пример #3
0
 def process_sub_hits(self, stage, sub_hits, data):
     search_index = self.request.registry['search_index']
     result = []
     for sub_hit in sub_hits:
         sub_data = sub_hit['data']
         text_type = sub_data['type']
         title = text_type.title()
         highlight = None
         if text_type == 'project':
             continue
         elif text_type in ('title', 'page'):
             docs = self.get_docs(stage, data)
             try:
                 entry = docs[sub_data['text_path']]
             except KeyError:
                 highlight = (
                     "Couldn't access documentation files for %s "
                     "version %s on %s. This is a bug. If you find a way "
                     "to reproduce this, please file an issue at: "
                     "https://github.com/devpi/devpi/issues" %
                     (data['name'], data['doc_version'], stage.name))
             else:
                 text = entry['text']
                 highlight = search_index.highlight(text,
                                                    sub_hit.get('words'))
             title = sub_data.get('text_title', title)
             text_path = sub_data.get('text_path')
             if text_path:
                 sub_hit['url'] = self.request.route_url(
                     "docviewroot",
                     user=data['user'],
                     index=data['index'],
                     project=normalize_name(data['name']),
                     version=data['doc_version'],
                     relpath="%s.html" % text_path)
         elif text_type in ('keywords', 'description', 'summary'):
             metadata = self.get_versiondata(stage, data)
             if metadata is None:
                 continue
             text = metadata.get(text_type)
             if text is None:
                 continue
             highlight = search_index.highlight(text, sub_hit.get('words'))
             if 'version' in data:
                 sub_hit['url'] = self.request.route_url(
                     "/{user}/{index}/{project}/{version}",
                     user=data['user'],
                     index=data['index'],
                     project=normalize_name(data['name']),
                     version=data['version'],
                     _anchor=text_type)
         else:
             log.error("Unknown type %s" % text_type)
             continue
         sub_hit['title'] = title
         sub_hit['highlight'] = highlight
         result.append(sub_hit)
     return result
Пример #4
0
 def get_project_info_perstage(self, name):
     """ return normalized name for the given name or None
     if no project exists. """
     assert py.builtin._istext(name)
     names = self.getprojectnames_perstage()
     norm2name = dict([(normalize_name(x), x) for x in names])
     realname = norm2name.get(normalize_name(name), None)
     if realname:
         return ProjectInfo(self, realname)
Пример #5
0
 def get_project_info_perstage(self, name):
     """ return normalized name for the given name or None
     if no project exists. """
     assert py.builtin._istext(name)
     names = self.getprojectnames_perstage()
     norm2name = dict([(normalize_name(x), x) for x in names])
     realname = norm2name.get(normalize_name(name), None)
     if realname:
         return ProjectInfo(self, realname)
Пример #6
0
 def get_projectname_perstage(self, name):
     """ return existing projectname for the given name which may
     be in a non-canonical form. """
     assert py.builtin._istext(name)
     names = self.list_projectnames_perstage()
     if name in names:
         return name
     normname = normalize_name(name)
     for projectname in names:
         if normalize_name(projectname) == normname:
             return projectname
Пример #7
0
 def get_projectname_perstage(self, name):
     """ return existing projectname for the given name which may
     be in a non-canonical form. """
     assert py.builtin._istext(name)
     names = self.list_projectnames_perstage()
     if name in names:
         return name
     normname = normalize_name(name)
     for projectname in names:
         if normalize_name(projectname) == normname:
             return projectname
Пример #8
0
 def process_sub_hits(self, stage, sub_hits, data):
     search_index = self.request.registry['search_index']
     result = []
     for sub_hit in sub_hits:
         sub_data = sub_hit['data']
         text_type = sub_data['type']
         title = text_type.title()
         highlight = None
         if text_type == 'project':
             continue
         elif text_type in ('title', 'page'):
             docs = self.get_docs(stage, data)
             entry = docs[sub_data['text_path']]
             text = entry['text']
             highlight = search_index.highlight(text, sub_hit.get('words'))
             title = sub_data.get('text_title', title)
             text_path = sub_data.get('text_path')
             if text_path:
                 sub_hit['url'] = self.request.route_url(
                     "docviewroot",
                     user=data['user'],
                     index=data['index'],
                     project=normalize_name(data['name']),
                     version=data['doc_version'],
                     relpath="%s.html" % text_path)
         elif text_type in ('keywords', 'description', 'summary'):
             metadata = self.get_versiondata(stage, data)
             if metadata is None:
                 continue
             text = metadata.get(text_type)
             if text is None:
                 continue
             highlight = search_index.highlight(text, sub_hit.get('words'))
             if 'version' in data:
                 sub_hit['url'] = self.request.route_url(
                     "/{user}/{index}/{project}/{version}",
                     user=data['user'],
                     index=data['index'],
                     project=normalize_name(data['name']),
                     version=data['version'],
                     _anchor=text_type)
         else:
             log.error("Unknown type %s" % text_type)
             continue
         sub_hit['title'] = title
         sub_hit['highlight'] = highlight
         result.append(sub_hit)
     return result
Пример #9
0
def preprocess_project(stage, name_input):
    name = normalize_name(name_input)
    try:
        user = stage.user.name
        index = stage.index
    except AttributeError:
        user, index = stage.name.split('/')
    if not is_project_cached(stage, name):
        return dict(name=name, user=user, index=index)
    setuptools_metadata = frozenset((
        'author', 'author_email', 'classifiers', 'description', 'download_url',
        'home_page', 'keywords', 'license', 'platform', 'summary'))
    versions = get_sorted_versions(stage.list_versions_perstage(name))
    result = dict(name=name)
    for i, version in enumerate(versions):
        if i == 0:
            verdata = stage.get_versiondata_perstage(name, version)
            result.update(verdata)
        links = stage.get_linkstore_perstage(name, version).get_links(rel="doczip")
        if links:
            # we assume it has been unpacked
            result['doc_version'] = version
            result['+doczip'] = Docs(stage, name, version)
            break
        else:
            assert '+doczip' not in result

    result[u'user'] = user
    result[u'index'] = index
    for key in setuptools_metadata:
        if key in result:
            value = result[key]
            if value == 'UNKNOWN' or not value:
                del result[key]
    return result
Пример #10
0
    def dump(self):
        if self.stage.ixconfig["type"] == "mirror":
            projects = []
        else:
            self.indexmeta["projects"] = {}
            self.indexmeta["files"] = []
            projects = self.stage.list_projects_perstage()
        for name in projects:
            data = {}
            versions = self.stage.list_versions_perstage(name)
            for version in versions:
                v = self.stage.get_versiondata_perstage(name, version)
                data[version] = get_mutable_deepcopy(v)
            for val in data.values():
                val.pop("+elinks", None)
            norm_name = normalize_name(name)
            assert norm_name not in self.indexmeta["projects"]
            self.indexmeta["projects"][norm_name] = data

            for version in data:
                vername = data[version]["name"]
                linkstore = self.stage.get_linkstore_perstage(vername, version)
                self.basedir.ensure(dir=1)
                self.dump_releasefiles(linkstore)
                self.dump_toxresults(linkstore)
                entry = self.stage.get_doczip_entry(vername, version)
                if entry:
                    self.dump_docfile(vername, version, entry)
        self.exporter.completed("index %r" % self.stage.name)
Пример #11
0
 def store_releasefile(self,
                       project,
                       version,
                       filename,
                       content,
                       last_modified=None):
     project = normalize_name(project)
     filename = ensure_unicode(filename)
     if not self.get_versiondata_perstage(project, version):
         # There's a chance the version was guessed from the
         # filename, which might have swapped dashes to underscores
         if '_' in version:
             version = version.replace('_', '-')
             if not self.get_versiondata_perstage(project, version):
                 raise MissesRegistration("%s-%s", project, version)
         else:
             raise MissesRegistration("%s-%s", project, version)
     linkstore = self.get_linkstore_perstage(project,
                                             version,
                                             readonly=False)
     link = linkstore.create_linked_entry(rel="releasefile",
                                          basename=filename,
                                          file_content=content,
                                          last_modified=last_modified)
     self._regen_simplelinks(project)
     return link
Пример #12
0
 def init_pypi_mirror(self, proxy):
     """ initialize pypi mirror if no mirror state exists. """
     self.proxy = proxy
     name2serials = self.keyfs.PYPISERIALS.get({})
     if not name2serials:
         log.info("retrieving initial name/serial list")
         name2serials = proxy.list_packages_with_serial()
         if name2serials is None:
             from devpi_server.main import fatal
             fatal("mirror initialization failed: "
                   "pypi.python.org not reachable")
         self.keyfs.PYPISERIALS.set(name2serials)
     else:
         log.info("reusing already cached name/serial list")
     # normalize to unicode->serial mapping
     for name in list(name2serials):
         if not py.builtin._istext(name):
             val = name2serials.pop(name)
             name2serials[py.builtin._totext(name, "utf-8")] = val
     self.name2serials = name2serials
     # create a mapping of normalized name to real name
     self.normname2name = d = dict()
     for name in name2serials:
         norm = normalize_name(name)
         if norm != name:
             d[norm] = name
Пример #13
0
 def init_pypi_mirror(self, proxy):
     """ initialize pypi mirror if no mirror state exists. """
     self.proxy = proxy
     name2serials = self.keyfs.PYPISERIALS.get({})
     if not name2serials:
         log.info("retrieving initial name/serial list")
         name2serials = proxy.list_packages_with_serial()
         if name2serials is None:
             from devpi_server.main import fatal
             fatal("mirror initialization failed: "
                   "pypi.python.org not reachable")
         self.keyfs.PYPISERIALS.set(name2serials)
     else:
         log.info("reusing already cached name/serial list")
     # normalize to unicode->serial mapping
     for name in list(name2serials):
         if not py.builtin._istext(name):
             val = name2serials.pop(name)
             name2serials[py.builtin._totext(name, "utf-8")] = val
     self.name2serials = name2serials
     # create a mapping of normalized name to real name
     self.normname2name = d = dict()
     for name in name2serials:
         norm = normalize_name(name)
         if norm != name:
             d[norm] = name
Пример #14
0
 def parse_index(self, disturl, html, scrape=True):
     p = HTMLPage(html, disturl.url)
     seen = set()
     for link in p.links:
         newurl = URL(link.url)
         if not newurl.is_valid_http_url():
             continue
         eggfragment = newurl.eggfragment
         if scrape and eggfragment:
             if normalize_name(eggfragment).startswith(self.projectname):
                 # XXX seems we have to maintain a particular
                 # order to keep pip/easy_install happy with some
                 # packages (e.g. nose)
                 if newurl not in self.egglinks:
                     self.egglinks.insert(0, newurl)
             else:
                 log.debug("skip egg link %s (projectname: %s)",
                           newurl, self.projectname)
             continue
         if is_archive_of_project(newurl, self.projectname):
             if not newurl.is_valid_http_url():
                 log.warn("unparseable/unsupported url: %r", newurl)
             else:
                 seen.add(newurl.url)
                 self._mergelink_ifbetter(newurl)
                 continue
     if scrape:
         for link in p.rel_links():
             if link.url not in seen:
                 disturl = URL(link.url)
                 if disturl.is_valid_http_url():
                     self.crawllinks.add(disturl)
Пример #15
0
 def get_releaselinks_perstage(self, project):
     # compatibility access method for devpi-findlinks and possibly other plugins
     project = normalize_name(project)
     return [
         self._make_elink(project, key, href)
         for key, href in self.get_simplelinks_perstage(project)
     ]
Пример #16
0
 def parse_index(self, disturl, html, scrape=True):
     p = HTMLPage(html, disturl.url)
     seen = set()
     for link in p.links:
         newurl = URL(link.url)
         if not newurl.is_valid_http_url():
             continue
         eggfragment = newurl.eggfragment
         if scrape and eggfragment:
             if normalize_name(eggfragment).startswith(self.projectname):
                 # XXX seems we have to maintain a particular
                 # order to keep pip/easy_install happy with some
                 # packages (e.g. nose)
                 if newurl not in self.egglinks:
                     self.egglinks.insert(0, newurl)
             else:
                 log.debug("skip egg link %s (projectname: %s)", newurl,
                           self.projectname)
             continue
         if is_archive_of_project(newurl, self.projectname):
             if not newurl.is_valid_http_url():
                 log.warn("unparseable/unsupported url: %r", newurl)
             else:
                 seen.add(newurl.url)
                 self._mergelink_ifbetter(newurl)
                 continue
     if scrape:
         for link in p.rel_links():
             if link.url not in seen:
                 disturl = URL(link.url)
                 if disturl.is_valid_http_url():
                     self.crawllinks.add(disturl)
Пример #17
0
 def get_releaselinks(self, project):
     # compatibility access method used by devpi-web and tests
     project = normalize_name(project)
     return [
         self._make_elink(project, key, href)
         for key, href in self.get_simplelinks(project)
     ]
Пример #18
0
 def _set_versiondata(self, metadata):
     project = normalize_name(metadata["name"])
     version = metadata["version"]
     key_projversion = self.key_projversion(project, version)
     versiondata = key_projversion.get(readonly=False)
     if not key_projversion.is_dirty():
         # check if something really changed to prevent
         # unneccessary changes on db/replica level
         for key, val in metadata.items():
             if val != versiondata.get(key):
                 break
         else:
             threadlog.info("not re-registering same metadata for %s-%s",
                            project, version)
             return
     versiondata.update(metadata)
     key_projversion.set(versiondata)
     threadlog.info("set_metadata %s-%s", project, version)
     versions = self.key_projversions(project).get(readonly=False)
     if version not in versions:
         versions.add(version)
         self.key_projversions(project).set(versions)
     projects = self.key_projects.get(readonly=False)
     if project not in projects:
         projects.add(project)
         self.key_projects.set(projects)
Пример #19
0
 def _save_cache_links(self, project, links, requires_python, yanked,
                       serial):
     assert links != ()  # we don't store the old "Not Found" marker anymore
     assert isinstance(serial, int)
     assert project == normalize_name(project), project
     data = {
         "serial": serial,
         "links": links,
         "requires_python": requires_python,
         "yanked": yanked
     }
     key = self.key_projsimplelinks(project)
     old = key.get()
     if old != data:
         threadlog.debug("saving changed simplelinks for %s: %s", project,
                         data)
         key.set(data)
         # maintain list of currently cached project names to enable
         # deletion and offline mode
         self.add_project_name(project)
     # XXX if the transaction fails the links are still marked
     # as refreshed but the data was not persisted.  It's a rare
     # enough event (tm) to not worry too much, though.
     # (we can, however, easily add a
     # keyfs.tx.on_commit_success(callback) method.
     self.cache_retrieve_times.refresh(project)
Пример #20
0
 def set_project_serial(self, name, serial):
     """ set the current serial and fill normalization table. """
     self.name2serials[name] = serial
     n = normalize_name(name)
     if n != name:
         self.normname2name[n] = name
     return n
Пример #21
0
 def on_changed_file_entry(self, ev):
     """ when a file entry is modified. """
     params = ev.typedkey.params
     user = params.get("user")
     index = params.get("index")
     keyfs = self.xom.keyfs
     with keyfs.transaction(at_serial=ev.at_serial):
         stage = self.xom.model.getstage(user, index)
         if stage is not None and stage.ixconfig["type"] == "mirror":
             return  # we don't trigger on file changes of pypi mirror
         entry = FileEntry(self.xom, ev.typedkey, meta=ev.value)
         if not entry.project or not entry.version:
             # the entry was deleted
             self.xom.config.hook.devpiserver_on_remove_file(
                 stage=stage, relpath=ev.typedkey.relpath)
             return
         name = entry.project
         assert name == normalize_name(name)
         linkstore = stage.get_linkstore_perstage(name, entry.version)
         links = linkstore.get_links(basename=entry.basename)
         if len(links) == 1:
             self.xom.config.hook.devpiserver_on_upload(
                 stage=stage,
                 project=name,
                 version=entry.version,
                 link=links[0])
Пример #22
0
 def store_doczip(self, project, version, content):
     project = normalize_name(project)
     if not version:
         version = self.get_latest_version_perstage(project)
         if not version:
             raise MissesVersion(
                 "doczip has no version and '%s' has no releases to "
                 "derive one from", project)
         threadlog.info("store_doczip: derived version of %s is %s",
                        project, version)
     basename = "%s-%s.doc.zip" % (project, version)
     verdata = self.get_versiondata_perstage(project,
                                             version,
                                             readonly=False)
     if not verdata:
         self.set_versiondata({'name': project, 'version': version})
     linkstore = self.get_linkstore_perstage(project,
                                             version,
                                             readonly=False)
     link = linkstore.create_linked_entry(
         rel="doczip",
         basename=basename,
         file_content=content,
     )
     return link
Пример #23
0
    def op_sro_check_mirror_whitelist(self, opname, **kw):
        project = normalize_name(kw["project"])
        whitelisted = private_hit = False
        for stage in self.sro():
            if stage.ixconfig["type"] == "mirror":
                if private_hit:
                    if not whitelisted:
                        threadlog.debug(
                            "%s: private package %r not whitelisted, "
                            "ignoring %s", opname, project, stage.name)
                        continue
                    threadlog.debug(
                        "private package %r whitelisted at stage %s", project,
                        whitelisted.name)
            else:
                whitelist = set(stage.ixconfig["mirror_whitelist"])
                if '*' in whitelist or project in whitelist:
                    whitelisted = stage
                elif stage.has_project_perstage(project):
                    private_hit = True

            try:
                res = getattr(stage, opname)(**kw)
                private_hit = private_hit or res
                yield stage, res
            except UpstreamError as exc:
                # If we are currently checking ourself raise the error, it is fatal
                if stage is self:
                    raise
                threadlog.warn(
                    'Failed to check mirror whitelist. Assume it does not exists (%s)',
                    exc)
Пример #24
0
 def _regen_simplelinks(self, project_input):
     project = normalize_name(project_input)
     links = []
     for version in self.list_versions_perstage(project):
         linkstore = self.get_linkstore_perstage(project, version)
         links.extend(
             map(make_key_and_href, linkstore.get_links("releasefile")))
     self.key_projsimplelinks(project).set({"links": links})
Пример #25
0
 def _dump_project_cache(self, projectname, dumplist, serial):
     normname = normalize_name(projectname)
     data = {
         "serial": serial,
         "entrylist": dumplist,
         "projectname": projectname
     }
     self.keyfs.PYPILINKS(name=normname).set(data)
Пример #26
0
 def get_releaselinks(self, project):
     # compatibility access method used by devpi-web and tests
     project = normalize_name(project)
     try:
         return [self._make_elink(project, key, href, require_python)
                 for key, href, require_python in self.get_simplelinks(project)]
     except self.UpstreamNotFoundError:
         return []
Пример #27
0
def get_unpack_path(stage, name, version):
    path = stage.xom.config.args.documentation_path
    if path is None:
        path = stage.keyfs.basedir
    else:
        path = py.path.local(path)
    return path.join(stage.user.name, stage.index, normalize_name(name),
                     version, "+doc")
Пример #28
0
 def iter_projects_normalized(self, projects):
     project_name_map = {}
     for project in projects:
         project_name_map.setdefault(normalize_name(project), set()).add(project)
     for project, names in project_name_map.items():
         versions = {}
         for name in names:
             versions.update(projects[name])
         yield (project, versions)
Пример #29
0
 def del_project(self, project):
     project = normalize_name(project)
     for version in list(self.key_projversions(project).get()):
         self.del_versiondata(project, version, cleanup=False)
     self._regen_simplelinks(project)
     with self.key_projects.update() as projects:
         projects.remove(project)
     threadlog.info("deleting project %s", project)
     self.key_projversions(project).delete()
Пример #30
0
 def __init__(self, stage, project, version, readonly=True):
     self.stage = stage
     self.filestore = stage.filestore
     self.project = normalize_name(project)
     self.version = version
     self.verdata = stage.get_versiondata_perstage(self.project, version, readonly=readonly)
     if not self.verdata:
         raise MissesRegistration("%s-%s on stage %s",
                                  project, version, stage.name)
Пример #31
0
 def _set_project_serial(self, name, serial):
     """ set the current serial and fill normalization table
     if project does not exist.
     """
     if name in self.name2serials:
         self.name2serials[name] = serial
     else:
         self.name2serials[name] = serial
         n = normalize_name(name)
         if n != name:
             self.normname2name[n] = name
Пример #32
0
 def init_pypi_mirror(self, proxy):
     """ initialize pypi mirror if no mirror state exists. """
     self.name2serials = self.load_name2serials(proxy)
     # create a mapping of normalized name to real name
     self.normname2name = d = dict()
     for name in self.name2serials:
         norm = normalize_name(name)
         assert py.builtin._istext(norm)
         assert py.builtin._istext(name)
         if norm != name:
             d[norm] = name
Пример #33
0
 def _dump_project_cache(self, projectname, entries, serial):
     normname = normalize_name(projectname)
     dumplist = [(entry.relpath, entry.md5, entry.eggfragment)
                         for entry in entries]
     data = {"serial": serial,
             "latest_serial": serial,
             "entrylist": dumplist,
             "projectname": projectname}
     threadlog.debug("saving data for %s: %s", projectname, data)
     self.keyfs.PYPILINKS(name=normname).set(data)
     return list(self._make_elinks(projectname, data["entrylist"]))
Пример #34
0
 def init_pypi_mirror(self, proxy):
     """ initialize pypi mirror if no mirror state exists. """
     self.name2serials = self.load_name2serials(proxy)
     # create a mapping of normalized name to real name
     self.normname2name = d = dict()
     for name in self.name2serials:
         norm = normalize_name(name)
         assert py.builtin._istext(norm)
         assert py.builtin._istext(name)
         if norm != name:
             d[norm] = name
Пример #35
0
 def _set_project_serial(self, name, serial):
     """ set the current serial and fill normalization table
     if project does not exist.
     """
     if name in self.name2serials:
         self.name2serials[name] = serial
     else:
         self.name2serials[name] = serial
         n = normalize_name(name)
         if n != name:
             self.normname2name[n] = name
Пример #36
0
 def _dump_project_cache(self, projectname, entries, serial):
     normname = normalize_name(projectname)
     dumplist = [(entry.relpath, entry.hash_spec, entry.eggfragment)
                         for entry in entries]
     data = {"serial": serial,
             "latest_serial": serial,
             "entrylist": dumplist,
             "projectname": projectname}
     threadlog.debug("saving data for %s: %s", projectname, data)
     self.keyfs.PYPILINKS(name=normname).set(data)
     return list(self._make_elinks(projectname, data["entrylist"]))
Пример #37
0
 def result(self):
     result = self.search_result
     if not result or not result['items']:
         return
     items = []
     for item in result['items']:
         data = item['data']
         stage = self.get_stage(data['path'])
         if stage is None:
             continue
         if 'version' in data:
             item['url'] = self.request.route_url(
                 "/{user}/{index}/{project}/{version}",
                 user=data['user'],
                 index=data['index'],
                 project=normalize_name(data['name']),
                 version=data['version'])
             item['title'] = "%s-%s" % (data['name'], data['version'])
         else:
             item['url'] = self.request.route_url(
                 "/{user}/{index}/{project}",
                 user=data['user'],
                 index=data['index'],
                 project=normalize_name(data['name']))
             item['title'] = data['name']
         item['sub_hits'] = self.process_sub_hits(stage, item['sub_hits'],
                                                  data)
         more_results = result['info']['collapsed_counts'][data['path']]
         if more_results:
             new_params = dict(self.params)
             new_params['query'] = "%s path:%s" % (self.params['query'],
                                                   data['path'])
             item['more_url'] = self.request.route_url('search',
                                                       _query=new_params)
             item['more_count'] = more_results
         items.append(item)
     if not items:
         return
     result['items'] = items
     return result
Пример #38
0
def get_docs_info(request, stage, metadata):
    if stage.ixconfig['type'] == 'mirror':
        return
    name, ver = normalize_name(metadata["name"]), metadata["version"]
    doc_path = get_unpack_path(stage, name, ver)
    if doc_path.exists():
        return dict(title="%s-%s" % (name, ver),
                    url=request.route_url("docviewroot",
                                          user=stage.user.name,
                                          index=stage.index,
                                          project=name,
                                          version=ver,
                                          relpath="index.html"))
Пример #39
0
    def compute_global_projectname_normalization(self):
        self.tw.line("computing global projectname normalization map")

        norm2maxversion = {}
        # compute latest normname version across all stages
        for user in self.xom.model.get_userlist():
            userconfig = user.get()
            for indexname in userconfig.get("indexes", []):
                stage = self.xom.model.getstage(user.name, indexname)
                names = stage.list_projectnames_perstage()
                for name in names:
                    # pypi names take precedence for defining the realname
                    if stage.name == "root/pypi":
                        version = Version("999999.99999")
                        version.realname = name
                        norm2maxversion[normalize_name(name)] = version
                        continue
                    versions = stage.list_versions_perstage(name)
                    if versions:
                        maxver = None
                        for ver in versions:
                            version = Version(ver)
                            verdata = stage.get_versiondata(name, ver)
                            version.realname = verdata.get("name", name)
                            if maxver is None or version > maxver:
                                maxver = version
                        if not maxver:
                            continue
                        norm = normalize_name(name)
                        normver = norm2maxversion.setdefault(norm, maxver)
                        if maxver > normver:
                            norm2maxversion[norm] = maxver

        # determine real name of a project
        self.norm2name = norm2name = {}
        for norm, maxver in norm2maxversion.items():
            norm2name[norm] = maxver.realname
Пример #40
0
    def filtered_list_project(self):
        request = self.request
        abort_if_invalid_project(request, request.matchdict["project"])
        project = self.context.project
        # we only serve absolute links so we don't care about the route's slash
        stage = self.context.stage
        releasefilter = get_release_filter(stage).get(project)
        if releasefilter is None:
            abort(self.request, 404, "The project %s does not exist." %(project))

        try:
            links = stage.get_simplelinks(project, sorted_links=False)
        except stage.UpstreamError as e:
            threadlog.error(e.msg)
            abort(request, 502, e.msg)

        result = []
        for key, url in links:
            parts = splitext_archive(key)[0].split('-')
            for index in range(1, len(parts)):
                name = normalize_name('-'.join(parts[:index]))
                if name == project:
                    version = '-'.join(parts[index:])
                    break
            else:
                continue
            if version in releasefilter:
                result.append((key, url))

        if not result:
            self.request.context.verified_project  # access will trigger 404 if not found

        # we don't need the extra stuff on the simple page for pip
        embed_form = False
        blocked_index = None
        response = Response(body=b"".join(self._simple_list_project(
            stage, project, result, embed_form, blocked_index)))
        if stage.ixconfig['type'] == 'mirror':
            serial = stage.key_projsimplelinks(project).get().get("serial")
            if serial > 0:
                response.headers[str("X-PYPI-LAST-SERIAL")] = str(serial)
        return response
Пример #41
0
    def set_project_serial(self, name, serial):
        """ set the current serial and update projectname normalization table.

        Usually ``name`` is a "realname" not a normalized name.
        But you can pass in a normalized name if the project
        is already known in which case we derive the real name
        automatically.
        """
        n = normalize_name(name)
        if n in self.normname2name:
            name = self.normname2name[n]

        if serial is None:
            del self.name2serials[name]
            self.normname2name.pop(n, None)
        else:
            self.name2serials[name] = serial
            if n != name:
                self.normname2name[n] = name
        return n
Пример #42
0
    def dump(self):
        import copy
        for name in self.stage.list_projectnames_perstage():
            data = {}
            versions = self.stage.list_versions_perstage(name)
            for version in versions:
                data[version] = copy.deepcopy(
                    self.stage.get_versiondata_perstage(name, version))
            for val in data.values():
                val.pop("+elinks", None)
            norm_name = normalize_name(name)
            assert norm_name not in self.indexmeta["projects"]
            self.indexmeta["projects"][norm_name] = data

            for version in data:
                vername = data[version]["name"]
                linkstore = self.stage.get_linkstore_perstage(vername, version)
                self.dump_releasefiles(linkstore)
                self.dump_toxresults(linkstore)
                entry = self.stage.get_doczip_entry(vername, version)
                if entry:
                    self.dump_docfile(vername, version, entry)
        self.exporter.completed("index %r" % self.stage.name)
Пример #43
0
 def clear_cache(self, projectname):
     normname = normalize_name(projectname)
     # we have to set to an empty dict instead of removing the key, so
     # replicas behave correctly
     self.keyfs.PYPILINKS(name=normname).set({})
     threadlog.debug("cleared cache for %s", projectname)
Пример #44
0
    def get_releaselinks_perstage(self, projectname):
        """ return all releaselinks from the index and referenced scrape
        pages, returning cached entries if we have a recent enough
        request stored locally.

        Raise UpstreamError if the pypi server cannot be reached or
        does not return a fresh enough page although we know it must
        exist.
        """
        projectname = self.get_projectname_perstage(projectname)
        if projectname is None:
            return []
        is_fresh, links = self._load_cache_links(projectname)
        if links is not None and is_fresh:
            return links

        # get the simple page for the project
        url = self.PYPIURL_SIMPLE + projectname + "/"
        threadlog.debug("visiting index %s", url)
        response = self.httpget(url, allow_redirects=True)
        if response.status_code != 200:
            # if we have an old version, return it instead of erroring out
            if links is not None:
                threadlog.error("serving stale links for %r, upstream not reachable",
                                projectname)
                return links
            # XXX it's not correct to return UpstreamError in all cases
            # if indeed the project was deleted but that fact
            # is not yet properly processed
            raise self.UpstreamError("%s status on GET %s" %
                                     (response.status_code, url))

        if self.xom.is_replica():
            # XXX this code path is not currently tested, handle with care!
            # we have already triggered the master above
            # and now need to wait until the parsed new links are
            # transferred back to the replica
            devpi_serial = int(response.headers["X-DEVPI-SERIAL"])
            self.keyfs.notifier.wait_tx_serial(devpi_serial)
            # XXX raise TransactionRestart to get a consistent clean view
            self.keyfs.commit_transaction_in_thread()
            self.keyfs.begin_transaction_in_thread()
            is_fresh, links = self._load_cache_links(projectname)
            if links is not None:
                return links
            raise self.UpstreamError("no cache links from master for %s" %
                                     projectname)

        # check that we got a fresh enough page
        serial = int(response.headers["X-PYPI-LAST-SERIAL"])
        newest_serial = self.pypimirror.name2serials.get(projectname, -1)
        if serial < newest_serial:
            raise self.UpstreamError(
                        "%s: pypi returned serial %s, expected %s",
                        projectname, serial, newest_serial)

        threadlog.debug("%s: got response with serial %s" %
                  (projectname, serial))


        # check returned url has the same normalized name
        ret_projectname = response.url.strip("/").split("/")[-1]
        assert normalize_name(projectname) == normalize_name(ret_projectname)


        # parse simple index's link and perform crawling
        assert response.text is not None, response.text
        result = parse_index(response.url, response.text)
        perform_crawling(self, result)
        releaselinks = list(result.releaselinks)

        self.keyfs.restart_as_write_transaction()

        # compute release link entries and cache according to serial
        entries = [self.filestore.maplink(link) for link in releaselinks]
        return self._dump_project_cache(projectname, entries, serial)
Пример #45
0
 def get_registered_name(self, name):
     norm_name = normalize_name(name)
     name = self.normname2name.get(norm_name, norm_name)
     if name in self.name2serials:
         return name
Пример #46
0
 def _dump_project_cache(self, projectname, dumplist, serial):
     normname = normalize_name(projectname)
     data = {"serial": serial,
             "entrylist": dumplist,
             "projectname": projectname}
     self.keyfs.PYPILINKS(name=normname).set(data)
Пример #47
0
 def get_real_projectname(self, name):
     norm = normalize_name(name)
     return self.norm2name[norm]
Пример #48
0
 def _load_project_cache(self, projectname):
     normname = normalize_name(projectname)
     return self.keyfs.PYPILINKS(name=normname).get(None)
Пример #49
0
 def get_project_info(self, name):
     norm_name = normalize_name(name)
     name = self.normname2name.get(norm_name, norm_name)
     if name in self.name2serials:
         return ProjectInfo(self, name)
Пример #50
0
 def __init__(self, projectname):
     self.projectname = normalize_name(projectname)
     self.basename2link = {}
     self.crawllinks = set()
     self.egglinks = []
Пример #51
0
 def _load_project_cache(self, projectname):
     normname = normalize_name(projectname)
     data = self.keyfs.PYPILINKS(name=normname).get()
     #log.debug("load data for %s: %s", projectname, data)
     return data
Пример #52
0
 def key_projversion(self, name, version):
     name = normalize_name(name)
     return self.keyfs.PROJVERSION(
         user=self.user.name, index=self.index, name=name, version=version)