def test_default_store_with_one_store(self): manager = CacheManager({ 'stores': { 'dict': { 'driver': 'dict' } } }) self.assertEqual('dict', manager.get_default_driver())
def test_extend_accepts_a_callable_returning_a_repository(self): cache = CacheManager({ 'default': 'my-driver', 'stores': { 'my-driver': { 'driver': 'my-driver' } } }) cache.extend('my-driver', lambda config: Repository(CustomStore())) self.assertIsInstance(cache.store().get_store(), CustomStore)
def test_extend_accepts_a_store_class(self): cache = CacheManager({ 'default': 'my-driver', 'stores': { 'my-driver': { 'driver': 'my-driver' } } }) cache.extend('my-driver', CustomStore) self.assertIsInstance(cache.store().get_store(), CustomStore)
def __init__(self, name, url, disable_cache=False): if name == "pypi": raise ValueError("The name [pypi] is reserved for repositories") self._packages = [] self._name = name self._url = url.rstrip("/") self._cache_dir = Path(CACHE_DIR) / "cache" / "repositories" / name self._cache = CacheManager( { "default": "releases", "serializer": "json", "stores": { "releases": {"driver": "file", "path": str(self._cache_dir)}, "packages": {"driver": "dict"}, "matches": {"driver": "dict"}, }, } ) self._session = CacheControl( requests.session(), cache=FileCache(str(self._cache_dir / "_http")) ) url_parts = urlparse.urlparse(self._url) if not url_parts.username: self._session.auth = get_http_basic_auth(self.name) self._disable_cache = disable_cache
def test_set_default_driver_changes_driver(self): cache = CacheManager({ 'default': 'dict', 'stores': { 'dict': { 'driver': 'dict' }, 'file': { 'driver': 'file', 'path': os.path.join(tempfile.gettempdir(), 'cachy') } } }) self.assertIsInstance(cache.store().get_store(), DictStore) cache.set_default_driver('file') self.assertIsInstance(cache.store().get_store(), FileStore)
def test_store_get_the_correct_store(self): cache = CacheManager({ 'default': 'dict', 'stores': { 'dict': { 'driver': 'dict' }, 'file': { 'driver': 'file', 'path': os.path.join(tempfile.gettempdir(), 'cachy') } } }) self.assertIsInstance(cache.store().get_store(), DictStore) self.assertIsInstance(cache.store('dict').get_store(), DictStore) self.assertIsInstance(cache.store('file').get_store(), FileStore)
def __init__(self, url="https://pypi.org/", disable_cache=False, fallback=True): self._name = "PyPI" self._url = url self._disable_cache = disable_cache self._fallback = fallback release_cache_dir = Path(CACHE_DIR) / "cache" / "repositories" / "pypi" self._cache = CacheManager( { "default": "releases", "serializer": "json", "stores": { "releases": {"driver": "file", "path": str(release_cache_dir)}, "packages": {"driver": "dict"}, }, } ) self._session = CacheControl( session(), cache=FileCache(str(release_cache_dir / "_http")) ) super(PyPiRepository, self).__init__()
class PyPiRepository(Repository): def __init__(self, url='https://pypi.org/', disable_cache=False): self._url = url self._disable_cache = disable_cache self._cache = CacheManager({ 'default': 'releases', 'serializer': 'json', 'stores': { 'releases': { 'driver': 'file', 'path': Path(CACHE_DIR) / 'cache' / 'repositories' / 'pypi' }, 'packages': { 'driver': 'dict' } } }) super().__init__() def find_packages(self, name: str, constraint: Union[Constraint, str, None] = None, extras: Union[list, None] = None ) -> List[Package]: """ Find packages on the remote server. """ packages = [] if constraint is not None and not isinstance(constraint, BaseConstraint): version_parser = VersionParser() constraint = version_parser.parse_constraints(constraint) info = self.get_package_info(name) versions = [] for version, release in info['releases'].items(): if ( not constraint or (constraint and constraint.matches(Constraint('=', version))) ): versions.append(version) for version in versions: packages.append( self.package(name, version, extras=extras) ) return packages def package(self, name: str, version: str, extras: Union[list, None] = None) -> Package: try: index = self._packages.index(Package(name, version, version)) return self._packages[index] except ValueError: if extras is None: extras = [] release_info = self.get_release_info(name, version) package = Package(name, version, version) requires_dist = release_info['requires_dist'] or [] for req in requires_dist: try: dependency = dependency_from_pep_508(req) except InvalidMarker: # Invalid marker # We strip the markers hoping for the best req = req.split(';')[0] dependency = dependency_from_pep_508(req) if dependency.extras: for extra in dependency.extras: if extra not in package.extras: package.extras[extra] = [] package.extras[extra].append(dependency) if not dependency.is_optional(): package.requires.append(dependency) # Adding description package.description = release_info.get('summary', '') # Adding hashes information package.hashes = release_info['digests'] # Activate extra dependencies for extra in extras: if extra in package.extras: for dep in package.extras[extra]: dep.activate() package.requires += package.extras[extra] self._packages.append(package) return package def search(self, query, mode=0): results = [] search = { 'name': query } if mode == self.SEARCH_FULLTEXT: search['summary'] = query client = ServerProxy(self._url) hits = client.search(search, 'or') for hit in hits: results.append({ 'name': hit['name'], 'description': hit['summary'], 'version': hit['version'] }) return results def get_package_info(self, name: str) -> dict: """ Return the package information given its name. The information is returned from the cache if it exists or retrieved from the remote server. """ if self._disable_cache: return self._get_package_info(name) return self._cache.store('packages').remember_forever( f'{name}', lambda: self._get_package_info(name) ) def _get_package_info(self, name: str) -> dict: data = self._get(self._url + f'pypi/{name}/json') if data is None: raise ValueError(f'Package [{name}] not found.') return data def get_release_info(self, name: str, version: str) -> dict: """ Return the release information given a package name and a version. The information is returned from the cache if it exists or retrieved from the remote server. """ if self._disable_cache: return self._get_release_info(name, version) return self._cache.remember_forever( f'{name}:{version}', lambda: self._get_release_info(name, version) ) def _get_release_info(self, name: str, version: str) -> dict: json_data = self._get(self._url + f'pypi/{name}/{version}/json') if json_data is None: raise ValueError(f'Package [{name}] not found.') info = json_data['info'] data = { 'name': info['name'], 'version': info['version'], 'summary': info['summary'], 'platform': info['platform'], 'requires_dist': info['requires_dist'], 'requires_python': info['requires_python'], 'digests': [] } for file_info in json_data['releases'][version]: data['digests'].append(file_info['digests']['sha256']) return data def _get(self, url: str) -> Union[dict, None]: json_response = get(url) if json_response.status_code == 404: return None json_data = json_response.json() return json_data
class PyPiRepository(Repository): def __init__(self, url='https://pypi.org/', disable_cache=False, fallback=True): self._name = 'PyPI' self._url = url self._disable_cache = disable_cache self._fallback = fallback release_cache_dir = Path(CACHE_DIR) / 'cache' / 'repositories' / 'pypi' self._cache = CacheManager({ 'default': 'releases', 'serializer': 'json', 'stores': { 'releases': { 'driver': 'file', 'path': str(release_cache_dir) }, 'packages': { 'driver': 'dict' } } }) self._session = CacheControl(session(), cache=FileCache( str(release_cache_dir / '_http'))) super(PyPiRepository, self).__init__() def find_packages( self, name, # type: str constraint=None, # type: Union[VersionConstraint, str, None] extras=None, # type: Union[list, None] allow_prereleases=False # type: bool ): # type: (...) -> List[Package] """ Find packages on the remote server. """ if constraint is None: constraint = '*' if not isinstance(constraint, VersionConstraint): constraint = parse_constraint(constraint) info = self.get_package_info(name) packages = [] for version, release in info['releases'].items(): if not release: # Bad release self._log( 'No release information found for {}-{}, skipping'.format( name, version), level='debug') continue package = Package(name, version) if package.is_prerelease( ) and not allow_prereleases and not constraint.allows( package.version): continue if (not constraint or (constraint and constraint.allows(package.version))): if extras is not None: package.requires_extras = extras packages.append(package) self._log('{} packages found for {} {}'.format(len(packages), name, str(constraint)), level='debug') return packages def package( self, name, # type: str version, # type: str extras=None # type: (Union[list, None]) ): # type: (...) -> Union[Package, None] try: index = self._packages.index(Package(name, version, version)) return self._packages[index] except ValueError: if extras is None: extras = [] release_info = self.get_release_info(name, version) package = Package(name, version, version) requires_dist = release_info['requires_dist'] or [] for req in requires_dist: try: dependency = dependency_from_pep_508(req) except InvalidMarker: # Invalid marker # We strip the markers hoping for the best req = req.split(';')[0] dependency = dependency_from_pep_508(req) except ValueError: # Likely unable to parse constraint so we skip it self._log( 'Invalid constraint ({}) found in {}-{} dependencies, ' 'skipping'.format(req, package.name, package.version), level='debug') continue if dependency.extras: for extra in dependency.extras: if extra not in package.extras: package.extras[extra] = [] package.extras[extra].append(dependency) if not dependency.is_optional(): package.requires.append(dependency) # Adding description package.description = release_info.get('summary', '') if release_info['requires_python']: package.python_versions = release_info['requires_python'] if release_info['platform']: package.platform = release_info['platform'] # Adding hashes information package.hashes = release_info['digests'] # Activate extra dependencies for extra in extras: if extra in package.extras: for dep in package.extras[extra]: dep.activate() package.requires += package.extras[extra] self._packages.append(package) return package def search(self, query, mode=0): results = [] search = {'name': query} if mode == self.SEARCH_FULLTEXT: search['summary'] = query client = ServerProxy('https://pypi.python.org/pypi') hits = client.search(search, 'or') for hit in hits: result = Package(hit['name'], hit['version'], hit['version']) result.description = to_str(hit['summary']) results.append(result) return results def get_package_info(self, name): # type: (str) -> dict """ Return the package information given its name. The information is returned from the cache if it exists or retrieved from the remote server. """ if self._disable_cache: return self._get_package_info(name) return self._cache.store('packages').remember_forever( name, lambda: self._get_package_info(name)) def _get_package_info(self, name): # type: (str) -> dict data = self._get('pypi/{}/json'.format(name)) if data is None: raise ValueError('Package [{}] not found.'.format(name)) return data def get_release_info(self, name, version): # type: (str, str) -> dict """ Return the release information given a package name and a version. The information is returned from the cache if it exists or retrieved from the remote server. """ if self._disable_cache: return self._get_release_info(name, version) return self._cache.remember_forever( '{}:{}'.format(name, version), lambda: self._get_release_info(name, version)) def _get_release_info(self, name, version): # type: (str, str) -> dict self._log('Getting info for {} ({}) from PyPI'.format(name, version), 'debug') json_data = self._get('pypi/{}/{}/json'.format(name, version)) if json_data is None: raise ValueError('Package [{}] not found.'.format(name)) info = json_data['info'] data = { 'name': info['name'], 'version': info['version'], 'summary': info['summary'], 'platform': info['platform'], 'requires_dist': info['requires_dist'], 'requires_python': info['requires_python'], 'digests': [], '_fallback': False } try: version_info = json_data['releases'][version] except KeyError: version_info = [] for file_info in version_info: data['digests'].append(file_info['digests']['sha256']) if (self._fallback and data['requires_dist'] is None): self._log('No dependencies found, downloading archives', level='debug') # No dependencies set (along with other information) # This might be due to actually no dependencies # or badly set metadata when uploading # So, we need to make sure there is actually no # dependencies by introspecting packages urls = {} for url in json_data['urls']: # Only get sdist and universal wheels dist_type = url['packagetype'] if dist_type not in ['sdist', 'bdist_wheel']: continue if dist_type == 'sdist' and 'dist' not in urls: urls[url['packagetype']] = url['url'] continue if 'bdist_wheel' in urls: continue # If bdist_wheel, check if it's universal python_version = url['python_version'] if python_version not in ['py2.py3', 'py3', 'py2']: continue parts = urlparse.urlparse(url['url']) filename = os.path.basename(parts.path) if '-none-any' not in filename: continue if not urls: return data info = self._get_info_from_urls(urls) data['requires_dist'] = info['requires_dist'] if not data['requires_python']: data['requires_python'] = info['requires_python'] return data def _get(self, endpoint): # type: (str) -> Union[dict, None] json_response = self._session.get(self._url + endpoint) if json_response.status_code == 404: return None json_data = json_response.json() return json_data def _get_info_from_urls( self, urls ): # type: (Dict[str, str]) -> Dict[str, Union[str, List, None]] if 'bdist_wheel' in urls: return self._get_info_from_wheel(urls['bdist_wheel']) return self._get_info_from_sdist(urls['sdist']) def _get_info_from_wheel( self, url): # type: (str) -> Dict[str, Union[str, List, None]] info = { 'summary': '', 'requires_python': None, 'requires_dist': None, } filename = os.path.basename(urlparse.urlparse(url).path) with temporary_directory() as temp_dir: filepath = os.path.join(temp_dir, filename) self._download(url, filepath) try: meta = pkginfo.Wheel(filepath) except ValueError: # Unable to determine dependencies # Assume none return info if meta.summary: info['summary'] = meta.summary or '' info['requires_python'] = meta.requires_python if meta.requires_dist: info['requires_dist'] = meta.requires_dist return info def _get_info_from_sdist( self, url): # type: (str) -> Dict[str, Union[str, List, None]] info = { 'summary': '', 'requires_python': None, 'requires_dist': None, } filename = os.path.basename(urlparse.urlparse(url).path) with temporary_directory() as temp_dir: filepath = Path(temp_dir) / filename self._download(url, str(filepath)) try: meta = pkginfo.SDist(str(filepath)) if meta.summary: info['summary'] = meta.summary if meta.requires_python: info['requires_python'] = meta.requires_python if meta.requires_dist: info['requires_dist'] = list(meta.requires_dist) return info except ValueError: # Unable to determine dependencies # We pass and go deeper pass # Still not dependencies found # So, we unpack and introspect suffix = filepath.suffix gz = None if suffix == '.zip': tar = zipfile.ZipFile(str(filepath)) else: if suffix == '.bz2': gz = BZ2File(str(filepath)) else: gz = GzipFile(str(filepath)) tar = tarfile.TarFile(str(filepath), fileobj=gz) try: tar.extractall(os.path.join(temp_dir, 'unpacked')) finally: if gz: gz.close() tar.close() unpacked = Path(temp_dir) / 'unpacked' sdist_dir = unpacked / Path(filename).name.rstrip('.tar.gz') # Checking for .egg-info at root eggs = list(sdist_dir.glob('*.egg-info')) if eggs: egg_info = eggs[0] requires = egg_info / 'requires.txt' if requires.exists(): with requires.open() as f: info['requires_dist'] = parse_requires(f.read()) return info # Searching for .egg-info in sub directories eggs = list(sdist_dir.glob('**/*.egg-info')) if eggs: egg_info = eggs[0] requires = egg_info / 'requires.txt' if requires.exists(): with requires.open() as f: info['requires_dist'] = parse_requires(f.read()) return info # Still nothing, assume no dependencies # We could probably get them by executing # python setup.py egg-info but I don't feel # confortable executing a file just for the sake # of getting dependencies. return info def _inspect_sdist_with_setup(self, sdist_dir): info = { 'requires_python': None, 'requires_dist': None, } setup = sdist_dir / 'setup.py' if not setup.exists(): return info venv = Venv.create(NullIO()) current_dir = os.getcwd() os.chdir(sdist_dir.as_posix()) try: venv.run('python', 'setup.py', 'egg_info') egg_info = list(sdist_dir.glob('**/*.egg-info'))[0] meta = pkginfo.UnpackedSDist(str(egg_info)) if meta.requires_python: info['requires_python'] = meta.requires_python if meta.requires_dist: info['requires_dist'] = list(meta.requires_dist) else: requires = egg_info / 'requires.txt' if requires.exists(): with requires.open() as f: info['requires_dist'] = parse_requires(f.read()) except Exception: pass os.chdir(current_dir) return info def _download(self, url, dest): # type: (str, str) -> None r = get(url, stream=True) with open(dest, 'wb') as f: for chunk in r.iter_content(chunk_size=1024): if chunk: f.write(chunk) def _log(self, msg, level='info'): getattr(logger, level)('{}: {}'.format(self._name, msg))
class Configuration(Mapping): DEFAULT_CONFIG_PATH = Path(user_config_dir("adr")) / "config.toml" DEFAULTS = { "cache": { "retention": 1440 }, # minutes "debug": False, "debug_url": "https://activedata.allizom.org/tools/query.html#query_id={}", "fmt": "table", "sources": [os.getcwd(), Path(adr.__file__).parent.parent.as_posix()], "url": "https://activedata.allizom.org/query", "verbose": False, } locked = False def __init__(self, path=None): self.path = Path(path or os.environ.get("ADR_CONFIG_PATH") or self.DEFAULT_CONFIG_PATH) self._config = self.DEFAULTS.copy() if self.path.is_file(): with open(self.path, "r") as fh: content = fh.read() self.merge(parse(content)["adr"]) self._config["sources"] = sorted( map(os.path.expanduser, set(self._config["sources"]))) # Use the NullStore by default. This allows us to control whether # caching is enabled or not at runtime. self._config["cache"].setdefault("stores", {"null": { "driver": "null" }}) self.cache = CacheManager(self._config["cache"]) self.cache.extend("null", lambda driver: NullStore()) self.locked = True def __len__(self): return len(self._config) def __iter__(self): return iter(self._config) def __getitem__(self, key): return self._config[key] def __getattr__(self, key): if key in vars(self): return vars(self)[key] return self.__getitem__(key) def __setattr__(self, key, value): if self.locked: raise AttributeError( "Don't set attributes directly, use `config.set(key=value)` instead." ) super(Configuration, self).__setattr__(key, value) def set(self, **kwargs): """Set data on the config object.""" self._config.update(kwargs) def merge(self, other): """Merge data into config (updates dicts and lists instead of overwriting them). Args: other (dict): Dictionary to merge configuration with. """ merge_to(other, self._config) def dump(self): return "\n".join(flatten(self._config))
class PyPiRepository(Repository): def __init__(self, url='https://pypi.org/', disable_cache=False, fallback=False): self._url = url self._disable_cache = disable_cache self._fallback = fallback release_cache_dir = Path(CACHE_DIR) / 'cache' / 'repositories' / 'pypi' self._cache = CacheManager({ 'default': 'releases', 'serializer': 'json', 'stores': { 'releases': { 'driver': 'file', 'path': str(release_cache_dir) }, 'packages': { 'driver': 'dict' } } }) self._session = CacheControl(session(), cache=FileCache( str(release_cache_dir / '_http'))) super(PyPiRepository, self).__init__() def find_packages( self, name, # type: str constraint=None, # type: Union[Constraint, str, None] extras=None # type: Union[list, None] ): # type: (...) -> List[Package] """ Find packages on the remote server. """ packages = [] if constraint is not None and not isinstance(constraint, BaseConstraint): version_parser = VersionParser() constraint = version_parser.parse_constraints(constraint) info = self.get_package_info(name) versions = [] for version, release in info['releases'].items(): if (not constraint or (constraint and constraint.matches(Constraint('=', version)))): versions.append(version) for version in versions: packages.append(Package(name, version, version)) return packages def package( self, name, # type: str version, # type: str extras=None # type: (Union[list, None]) ): # type: (...) -> Union[Package, None] try: index = self._packages.index(Package(name, version, version)) return self._packages[index] except ValueError: if extras is None: extras = [] release_info = self.get_release_info(name, version) if (self._fallback and release_info['requires_dist'] is None and not release_info['requires_python'] and not release_info['platform']): # No dependencies set (along with other information) # This might be due to actually no dependencies # or badly set metadata when uploading # So, we return None so that the fallback repository # can pick up more accurate info return package = Package(name, version, version) requires_dist = release_info['requires_dist'] or [] for req in requires_dist: try: dependency = dependency_from_pep_508(req) except InvalidMarker: # Invalid marker # We strip the markers hoping for the best req = req.split(';')[0] dependency = dependency_from_pep_508(req) except ValueError: # Likely unable to parse constraint so we skip it continue if dependency.extras: for extra in dependency.extras: if extra not in package.extras: package.extras[extra] = [] package.extras[extra].append(dependency) if not dependency.is_optional(): package.requires.append(dependency) # Adding description package.description = release_info.get('summary', '') # Adding hashes information package.hashes = release_info['digests'] # Activate extra dependencies for extra in extras: if extra in package.extras: for dep in package.extras[extra]: dep.activate() package.requires += package.extras[extra] self._packages.append(package) return package def search(self, query, mode=0): results = [] search = {'name': query} if mode == self.SEARCH_FULLTEXT: search['summary'] = query client = ServerProxy('https://pypi.python.org/pypi') hits = client.search(search, 'or') for hit in hits: result = Package(hit['name'], hit['version'], hit['version']) result.description = hit['summary'] results.append(result) return results def get_package_info(self, name): # type: (str) -> dict """ Return the package information given its name. The information is returned from the cache if it exists or retrieved from the remote server. """ if self._disable_cache: return self._get_package_info(name) return self._cache.store('packages').remember_forever( name, lambda: self._get_package_info(name)) def _get_package_info(self, name): # type: (str) -> dict data = self._get('pypi/{}/json'.format(name)) if data is None: raise ValueError('Package [{}] not found.'.format(name)) return data def get_release_info(self, name, version): # type: (str, str) -> dict """ Return the release information given a package name and a version. The information is returned from the cache if it exists or retrieved from the remote server. """ if self._disable_cache: return self._get_release_info(name, version) return self._cache.remember_forever( '{}:{}'.format(name, version), lambda: self._get_release_info(name, version)) def _get_release_info(self, name, version): # type: (str, str) -> dict json_data = self._get('pypi/{}/{}/json'.format(name, version)) if json_data is None: raise ValueError('Package [{}] not found.'.format(name)) info = json_data['info'] data = { 'name': info['name'], 'version': info['version'], 'summary': info['summary'], 'platform': info['platform'], 'requires_dist': info['requires_dist'], 'requires_python': info['requires_python'], 'digests': [] } try: version_info = json_data['releases'][version] except KeyError: version_info = [] for file_info in version_info: data['digests'].append(file_info['digests']['sha256']) return data def _get(self, endpoint): # type: (str) -> Union[dict, None] json_response = self._session.get(self._url + endpoint) if json_response.status_code == 404: return None json_data = json_response.json() return json_data
class LegacyRepository(PyPiRepository): def __init__(self, name, url, disable_cache=False): if name == "pypi": raise ValueError("The name [pypi] is reserved for repositories") self._packages = [] self._name = name self._url = url.rstrip("/") self._cache_dir = Path(CACHE_DIR) / "cache" / "repositories" / name self._cache = CacheManager({ "default": "releases", "serializer": "json", "stores": { "releases": { "driver": "file", "path": str(self._cache_dir) }, "packages": { "driver": "dict" }, "matches": { "driver": "dict" }, }, }) self._session = CacheControl(requests.session(), cache=FileCache( str(self._cache_dir / "_http"))) self._disable_cache = disable_cache @property def name(self): return self._name def find_packages(self, name, constraint=None, extras=None, allow_prereleases=False): packages = [] if constraint is not None and not isinstance(constraint, VersionConstraint): constraint = parse_constraint(constraint) key = name if constraint: key = "{}:{}".format(key, str(constraint)) if self._cache.store("matches").has(key): versions = self._cache.store("matches").get(key) else: page = self._get("/{}".format( canonicalize_name(name).replace(".", "-"))) if page is None: return [] versions = [] for version in page.versions: if not constraint or (constraint and constraint.allows(version)): versions.append(version) self._cache.store("matches").put(key, versions, 5) for version in versions: package = Package(name, version) package.source_type = "legacy" package.source_url = self._url if extras is not None: package.requires_extras = extras packages.append(package) self._log( "{} packages found for {} {}".format(len(packages), name, str(constraint)), level="debug", ) return packages def package(self, name, version, extras=None): # type: (...) -> poetry.packages.Package """ Retrieve the release information. This is a heavy task which takes time. We have to download a package to get the dependencies. We also need to download every file matching this release to get the various hashes. Note that, this will be cached so the subsequent operations should be much faster. """ try: index = self._packages.index( poetry.packages.Package(name, version, version)) return self._packages[index] except ValueError: if extras is None: extras = [] release_info = self.get_release_info(name, version) package = poetry.packages.Package(name, version, version) package.source_type = "legacy" package.source_url = self._url requires_dist = release_info["requires_dist"] or [] for req in requires_dist: try: dependency = dependency_from_pep_508(req) except InvalidMarker: # Invalid marker # We strip the markers hoping for the best req = req.split(";")[0] dependency = dependency_from_pep_508(req) if dependency.extras: for extra in dependency.extras: if extra not in package.extras: package.extras[extra] = [] package.extras[extra].append(dependency) if not dependency.is_optional(): package.requires.append(dependency) # Adding description package.description = release_info.get("summary", "") # Adding hashes information package.hashes = release_info["digests"] # Activate extra dependencies for extra in extras: if extra in package.extras: for dep in package.extras[extra]: dep.activate() package.requires += package.extras[extra] self._packages.append(package) return package def _get_release_info(self, name, version): # type: (str, str) -> dict page = self._get("/{}".format( canonicalize_name(name).replace(".", "-"))) if page is None: raise ValueError('No package named "{}"'.format(name)) data = { "name": name, "version": version, "summary": "", "requires_dist": [], "requires_python": [], "digests": [], } links = list(page.links_for_version(Version.parse(version))) urls = {} hashes = [] default_link = links[0] for link in links: if link.is_wheel: urls["bdist_wheel"] = link.url elif link.filename.endswith(".tar.gz"): urls["sdist"] = link.url elif link.filename.endswith( (".zip", ".bz2")) and "sdist" not in urls: urls["sdist"] = link.url hash = link.hash if link.hash_name == "sha256": hashes.append(hash) data["digests"] = hashes if not urls: if default_link.is_wheel: m = wheel_file_re.match(default_link.filename) python = m.group("pyver") platform = m.group("plat") if python == "py2.py3" and platform == "any": urls["bdist_wheel"] = default_link.url elif default_link.filename.endswith(".tar.gz"): urls["sdist"] = default_link.url elif (default_link.filename.endswith((".zip", ".bz2")) and "sdist" not in urls): urls["sdist"] = default_link.url else: return data info = self._get_info_from_urls(urls) data["summary"] = info["summary"] data["requires_dist"] = info["requires_dist"] data["requires_python"] = info["requires_python"] return data def _get(self, endpoint): # type: (str) -> Union[Page, None] url = self._url + endpoint response = self._session.get(url) if response.status_code == 404: return return Page(url, response.content, response.headers)
def handle(self) -> int: from cachy import CacheManager from poetry.locations import REPOSITORY_CACHE_DIR cache = self.argument("cache") parts = cache.split(":") root = parts[0] cache_dir = REPOSITORY_CACHE_DIR / root try: cache_dir.relative_to(REPOSITORY_CACHE_DIR) except ValueError: raise ValueError("{} is not a valid repository cache".format(root)) cache = CacheManager({ "default": parts[0], "serializer": "json", "stores": { parts[0]: { "driver": "file", "path": str(cache_dir) } }, }) if len(parts) == 1: if not self.option("all"): raise RuntimeError( "Add the --all option if you want to clear all " "{} caches".format(parts[0])) if not os.path.exists(str(cache_dir)): self.line("No cache entries for {}".format(parts[0])) return 0 # Calculate number of entries entries_count = 0 for _path, _dirs, files in os.walk(str(cache_dir)): entries_count += len(files) delete = self.confirm( "<question>Delete {} entries?</>".format(entries_count)) if not delete: return 0 cache.flush() elif len(parts) == 2: raise RuntimeError( "Only specifying the package name is not yet supported. " "Add a specific version to clear") elif len(parts) == 3: package = parts[1] version = parts[2] if not cache.has("{}:{}".format(package, version)): self.line("No cache entries for {}:{}".format( package, version)) return 0 delete = self.confirm("Delete cache entry {}:{}".format( package, version)) if not delete: return 0 cache.forget("{}:{}".format(package, version)) else: raise ValueError("Invalid cache key")
class PyPiRepository(Repository): def __init__(self, url='https://pypi.org/', disable_cache=False, fallback=True): self._url = url self._disable_cache = disable_cache self._fallback = fallback release_cache_dir = Path(CACHE_DIR) / 'cache' / 'repositories' / 'pypi' self._cache = CacheManager({ 'default': 'releases', 'serializer': 'json', 'stores': { 'releases': { 'driver': 'file', 'path': str(release_cache_dir) }, 'packages': { 'driver': 'dict' } } }) self._session = CacheControl( session(), cache=FileCache(str(release_cache_dir / '_http')) ) super(PyPiRepository, self).__init__() def find_packages(self, name, # type: str constraint=None, # type: Union[Constraint, str, None] extras=None # type: Union[list, None] ): # type: (...) -> List[Package] """ Find packages on the remote server. """ packages = [] if constraint is not None and not isinstance(constraint, BaseConstraint): version_parser = VersionParser() constraint = version_parser.parse_constraints(constraint) info = self.get_package_info(name) versions = [] for version, release in info['releases'].items(): if not release: # Bad release continue if ( not constraint or (constraint and constraint.matches(Constraint('=', version))) ): versions.append(version) for version in versions: packages.append(Package(name, version)) return packages def package(self, name, # type: str version, # type: str extras=None # type: (Union[list, None]) ): # type: (...) -> Union[Package, None] try: index = self._packages.index(Package(name, version, version)) return self._packages[index] except ValueError: if extras is None: extras = [] release_info = self.get_release_info(name, version) if ( self._fallback and release_info['requires_dist'] is None and not release_info['requires_python'] and '_fallback' not in release_info ): # Force cache update self._cache.forget('{}:{}'.format(name, version)) release_info = self.get_release_info(name, version) package = Package(name, version, version) requires_dist = release_info['requires_dist'] or [] for req in requires_dist: try: dependency = dependency_from_pep_508(req) except InvalidMarker: # Invalid marker # We strip the markers hoping for the best req = req.split(';')[0] dependency = dependency_from_pep_508(req) except ValueError: # Likely unable to parse constraint so we skip it continue if dependency.extras: for extra in dependency.extras: if extra not in package.extras: package.extras[extra] = [] package.extras[extra].append(dependency) if not dependency.is_optional(): package.requires.append(dependency) # Adding description package.description = release_info.get('summary', '') if release_info['requires_python']: package.python_versions = release_info['requires_python'] if release_info['platform']: package.platform = release_info['platform'] # Adding hashes information package.hashes = release_info['digests'] # Activate extra dependencies for extra in extras: if extra in package.extras: for dep in package.extras[extra]: dep.activate() package.requires += package.extras[extra] self._packages.append(package) return package def search(self, query, mode=0): results = [] search = { 'name': query } if mode == self.SEARCH_FULLTEXT: search['summary'] = query client = ServerProxy('https://pypi.python.org/pypi') hits = client.search(search, 'or') for hit in hits: result = Package(hit['name'], hit['version'], hit['version']) result.description = to_str(hit['summary']) results.append(result) return results def get_package_info(self, name): # type: (str) -> dict """ Return the package information given its name. The information is returned from the cache if it exists or retrieved from the remote server. """ if self._disable_cache: return self._get_package_info(name) return self._cache.store('packages').remember_forever( name, lambda: self._get_package_info(name) ) def _get_package_info(self, name): # type: (str) -> dict data = self._get('pypi/{}/json'.format(name)) if data is None: raise ValueError('Package [{}] not found.'.format(name)) return data def get_release_info(self, name, version): # type: (str, str) -> dict """ Return the release information given a package name and a version. The information is returned from the cache if it exists or retrieved from the remote server. """ if self._disable_cache: return self._get_release_info(name, version) return self._cache.remember_forever( '{}:{}'.format(name, version), lambda: self._get_release_info(name, version) ) def _get_release_info(self, name, version): # type: (str, str) -> dict json_data = self._get('pypi/{}/{}/json'.format(name, version)) if json_data is None: raise ValueError('Package [{}] not found.'.format(name)) info = json_data['info'] data = { 'name': info['name'], 'version': info['version'], 'summary': info['summary'], 'platform': info['platform'], 'requires_dist': info['requires_dist'], 'requires_python': info['requires_python'], 'digests': [], '_fallback': False } try: version_info = json_data['releases'][version] except KeyError: version_info = [] for file_info in version_info: data['digests'].append(file_info['digests']['sha256']) if ( self._fallback and data['requires_dist'] is None and not data['requires_python'] ): # No dependencies set (along with other information) # This might be due to actually no dependencies # or badly set metadata when uploading # So, we need to make sure there is actually no # dependencies by introspecting packages data['_fallback'] = True urls = {} for url in json_data['urls']: # Only get sdist and universal wheels dist_type = url['packagetype'] if dist_type not in ['sdist', 'bdist_wheel']: continue if dist_type == 'sdist' and 'dist' not in urls: urls[url['packagetype']] = url['url'] continue if 'bdist_wheel' in urls: continue # If bdist_wheel, check if it's universal python_version = url['python_version'] if python_version not in ['py2.py3', 'py3', 'py2']: continue parts = urlparse.urlparse(url['url']) filename = os.path.basename(parts.path) if '-none-any' not in filename: continue if not urls: return data requires_dist = self._get_requires_dist_from_urls(urls) data['requires_dist'] = requires_dist return data def _get(self, endpoint): # type: (str) -> Union[dict, None] json_response = self._session.get(self._url + endpoint) if json_response.status_code == 404: return None json_data = json_response.json() return json_data def _get_requires_dist_from_urls(self, urls ): # type: (dict) -> Union[list, None] if 'bdist_wheel' in urls: return self._get_requires_dist_from_wheel(urls['bdist_wheek']) return self._get_requires_dist_from_sdist(urls['sdist']) def _get_requires_dist_from_wheel(self, url ): # type: (str) -> Union[list, None] filename = os.path.basename(urlparse.urlparse(url).path) with temporary_directory() as temp_dir: filepath = os.path.join(temp_dir, filename) self._download(url, filepath) try: meta = pkginfo.Wheel(filepath) except ValueError: # Unable to determine dependencies # Assume none return if meta.requires_dist: return meta.requires_dist def _get_requires_dist_from_sdist(self, url ): # type: (str) -> Union[list, None] filename = os.path.basename(urlparse.urlparse(url).path) with temporary_directory() as temp_dir: filepath = Path(temp_dir) / filename self._download(url, str(filepath)) try: meta = pkginfo.SDist(str(filepath)) if meta.requires_dist: return meta.requires_dist except ValueError: # Unable to determine dependencies # We pass and go deeper pass # Still not dependencies found # So, we unpack and introspect suffix = filepath.suffix gz = None if suffix == '.zip': tar = zipfile.ZipFile(str(filepath)) else: if suffix == '.bz2': gz = BZ2File(str(filepath)) else: gz = GzipFile(str(filepath)) tar = tarfile.TarFile(str(filepath), fileobj=gz) try: tar.extractall(os.path.join(temp_dir, 'unpacked')) finally: if gz: gz.close() tar.close() unpacked = Path(temp_dir) / 'unpacked' sdist_dir = unpacked / Path(filename).name.rstrip('.tar.gz') # Checking for .egg-info eggs = list(sdist_dir.glob('*.egg-info')) if eggs: egg_info = eggs[0] requires = egg_info / 'requires.txt' if requires.exists(): with requires.open() as f: return self._parse_requires(f.read()) return # Still nothing, assume no dependencies # We could probably get them by executing # python setup.py egg-info but I don't feel # confortable executing a file just for the sake # of getting dependencies. return def _download(self, url, dest): # type: (str, str) -> None r = get(url, stream=True) with open(dest, 'wb') as f: for chunk in r.iter_content(chunk_size=1024): if chunk: f.write(chunk) def _parse_requires(self, requires): # type: (str) -> Union[list, None] lines = requires.split('\n') requires_dist = [] in_section = False current_marker = None for line in lines: line = line.strip() if not line: if in_section: in_section = False continue if line.startswith('['): # extras or conditional dependencies marker = line.lstrip('[').rstrip(']') if ':' not in marker: extra, marker = marker, None else: extra, marker = marker.split(':') if extra: if marker: marker = '{} and extra == "{}"'.format(marker, extra) else: marker = 'extra == "{}"'.format(extra) if marker: current_marker = marker continue if current_marker: line = '{}; {}'.format(line, current_marker) requires_dist.append(line) if requires_dist: return requires_dist
class PyPiRepository(Repository): CACHE_VERSION = parse_constraint("0.12.0") def __init__(self, url="https://pypi.org/", disable_cache=False, fallback=True): self._name = "PyPI" self._url = url self._disable_cache = disable_cache self._fallback = fallback release_cache_dir = Path(CACHE_DIR) / "cache" / "repositories" / "pypi" self._cache = CacheManager( { "default": "releases", "serializer": "json", "stores": { "releases": {"driver": "file", "path": str(release_cache_dir)}, "packages": {"driver": "dict"}, }, } ) self._session = CacheControl( session(), cache=FileCache(str(release_cache_dir / "_http")) ) super(PyPiRepository, self).__init__() def find_packages( self, name, # type: str constraint=None, # type: Union[VersionConstraint, str, None] extras=None, # type: Union[list, None] allow_prereleases=False, # type: bool ): # type: (...) -> List[Package] """ Find packages on the remote server. """ if constraint is None: constraint = "*" if not isinstance(constraint, VersionConstraint): constraint = parse_constraint(constraint) if isinstance(constraint, VersionRange): if ( constraint.max is not None and constraint.max.is_prerelease() or constraint.min is not None and constraint.min.is_prerelease() ): allow_prereleases = True info = self.get_package_info(name) packages = [] for version, release in info["releases"].items(): if not release: # Bad release self._log( "No release information found for {}-{}, skipping".format( name, version ), level="debug", ) continue try: package = Package(name, version) except ParseVersionError: self._log( 'Unable to parse version "{}" for the {} package, skipping'.format( version, name ), level="debug", ) continue if package.is_prerelease() and not allow_prereleases: continue if not constraint or (constraint and constraint.allows(package.version)): if extras is not None: package.requires_extras = extras packages.append(package) self._log( "{} packages found for {} {}".format(len(packages), name, str(constraint)), level="debug", ) return packages def package( self, name, # type: str version, # type: str extras=None, # type: (Union[list, None]) ): # type: (...) -> Union[Package, None] if extras is None: extras = [] release_info = self.get_release_info(name, version) package = Package(name, version, version) requires_dist = release_info["requires_dist"] or [] for req in requires_dist: try: dependency = dependency_from_pep_508(req) except InvalidMarker: # Invalid marker # We strip the markers hoping for the best req = req.split(";")[0] dependency = dependency_from_pep_508(req) except ValueError: # Likely unable to parse constraint so we skip it self._log( "Invalid constraint ({}) found in {}-{} dependencies, " "skipping".format(req, package.name, package.version), level="debug", ) continue if dependency.in_extras: for extra in dependency.in_extras: if extra not in package.extras: package.extras[extra] = [] package.extras[extra].append(dependency) if not dependency.is_optional(): package.requires.append(dependency) # Adding description package.description = release_info.get("summary", "") if release_info["requires_python"]: package.python_versions = release_info["requires_python"] if release_info["platform"]: package.platform = release_info["platform"] # Adding hashes information package.hashes = release_info["digests"] # Activate extra dependencies for extra in extras: if extra in package.extras: for dep in package.extras[extra]: dep.activate() package.requires += package.extras[extra] return package def search(self, query, mode=0): results = [] search = {"name": query} if mode == self.SEARCH_FULLTEXT: search["summary"] = query client = ServerProxy("https://pypi.python.org/pypi") hits = client.search(search, "or") for hit in hits: result = Package(hit["name"], hit["version"], hit["version"]) result.description = to_str(hit["summary"]) results.append(result) return results def get_package_info(self, name): # type: (str) -> dict """ Return the package information given its name. The information is returned from the cache if it exists or retrieved from the remote server. """ if self._disable_cache: return self._get_package_info(name) return self._cache.store("packages").remember_forever( name, lambda: self._get_package_info(name) ) def _get_package_info(self, name): # type: (str) -> dict data = self._get("pypi/{}/json".format(name)) if data is None: raise PackageNotFound("Package [{}] not found.".format(name)) return data def get_release_info(self, name, version): # type: (str, str) -> dict """ Return the release information given a package name and a version. The information is returned from the cache if it exists or retrieved from the remote server. """ if self._disable_cache: return self._get_release_info(name, version) cached = self._cache.remember_forever( "{}:{}".format(name, version), lambda: self._get_release_info(name, version) ) cache_version = cached.get("_cache_version", "0.0.0") if parse_constraint(cache_version) != self.CACHE_VERSION: # The cache must be updated self._log( "The cache for {} {} is outdated. Refreshing.".format(name, version), level="debug", ) cached = self._get_release_info(name, version) self._cache.forever("{}:{}".format(name, version), cached) return cached def _get_release_info(self, name, version): # type: (str, str) -> dict self._log("Getting info for {} ({}) from PyPI".format(name, version), "debug") json_data = self._get("pypi/{}/{}/json".format(name, version)) if json_data is None: raise PackageNotFound("Package [{}] not found.".format(name)) info = json_data["info"] data = { "name": info["name"], "version": info["version"], "summary": info["summary"], "platform": info["platform"], "requires_dist": info["requires_dist"], "requires_python": info["requires_python"], "digests": [], "_cache_version": str(self.CACHE_VERSION), } try: version_info = json_data["releases"][version] except KeyError: version_info = [] for file_info in version_info: data["digests"].append(file_info["digests"]["sha256"]) if self._fallback and data["requires_dist"] is None: self._log("No dependencies found, downloading archives", level="debug") # No dependencies set (along with other information) # This might be due to actually no dependencies # or badly set metadata when uploading # So, we need to make sure there is actually no # dependencies by introspecting packages urls = defaultdict(list) for url in json_data["urls"]: # Only get sdist and wheels if they exist dist_type = url["packagetype"] if dist_type not in ["sdist", "bdist_wheel"]: continue urls[dist_type].append(url["url"]) if not urls: return data info = self._get_info_from_urls(urls) data["requires_dist"] = info["requires_dist"] if not data["requires_python"]: data["requires_python"] = info["requires_python"] return data def _get(self, endpoint): # type: (str) -> Union[dict, None] json_response = self._session.get(self._url + endpoint) if json_response.status_code == 404: return None json_data = json_response.json() return json_data def _get_info_from_urls( self, urls ): # type: (Dict[str, List[str]]) -> Dict[str, Union[str, List, None]] # Checking wheels first as they are more likely to hold # the necessary information if "bdist_wheel" in urls: # Check fo a universal wheel wheels = urls["bdist_wheel"] universal_wheel = None universal_python2_wheel = None universal_python3_wheel = None platform_specific_wheels = [] for wheel in wheels: link = Link(wheel) m = wheel_file_re.match(link.filename) if not m: continue pyver = m.group("pyver") abi = m.group("abi") plat = m.group("plat") if abi == "none" and plat == "any": # Universal wheel if pyver == "py2.py3": # Any Python universal_wheel = wheel elif pyver == "py2": universal_python2_wheel = wheel else: universal_python3_wheel = wheel else: platform_specific_wheels.append(wheel) if universal_wheel is not None: return self._get_info_from_wheel(universal_wheel) info = {} if universal_python2_wheel and universal_python3_wheel: info = self._get_info_from_wheel(universal_python2_wheel) py3_info = self._get_info_from_wheel(universal_python3_wheel) if py3_info["requires_dist"]: if not info["requires_dist"]: info["requires_dist"] = py3_info["requires_dist"] return info py2_requires_dist = set( dependency_from_pep_508(r).to_pep_508() for r in info["requires_dist"] ) py3_requires_dist = set( dependency_from_pep_508(r).to_pep_508() for r in py3_info["requires_dist"] ) base_requires_dist = py2_requires_dist & py3_requires_dist py2_only_requires_dist = py2_requires_dist - py3_requires_dist py3_only_requires_dist = py3_requires_dist - py2_requires_dist # Normalizing requires_dist requires_dist = list(base_requires_dist) for requirement in py2_only_requires_dist: dep = dependency_from_pep_508(requirement) dep.marker = dep.marker.intersect( parse_marker("python_version == '2.7'") ) requires_dist.append(dep.to_pep_508()) for requirement in py3_only_requires_dist: dep = dependency_from_pep_508(requirement) dep.marker = dep.marker.intersect( parse_marker("python_version >= '3'") ) requires_dist.append(dep.to_pep_508()) info["requires_dist"] = sorted(list(set(requires_dist))) if info: return info # Prefer non platform specific wheels if universal_python3_wheel: return self._get_info_from_wheel(universal_python3_wheel) if universal_python2_wheel: return self._get_info_from_wheel(universal_python2_wheel) if platform_specific_wheels and "sdist" not in urls: # Pick the first wheel available and hope for the best return self._get_info_from_wheel(platform_specific_wheels[0]) return self._get_info_from_sdist(urls["sdist"][0]) def _get_info_from_wheel( self, url ): # type: (str) -> Dict[str, Union[str, List, None]] self._log( "Downloading wheel: {}".format(urlparse.urlparse(url).path.rsplit("/")[-1]), level="debug", ) info = {"summary": "", "requires_python": None, "requires_dist": None} filename = os.path.basename(urlparse.urlparse(url).path.rsplit("/")[-1]) with temporary_directory() as temp_dir: filepath = os.path.join(temp_dir, filename) self._download(url, filepath) try: meta = pkginfo.Wheel(filepath) except ValueError: # Unable to determine dependencies # Assume none return info if meta.summary: info["summary"] = meta.summary or "" info["requires_python"] = meta.requires_python if meta.requires_dist: info["requires_dist"] = meta.requires_dist return info def _get_info_from_sdist( self, url ): # type: (str) -> Dict[str, Union[str, List, None]] self._log( "Downloading sdist: {}".format(urlparse.urlparse(url).path.rsplit("/")[-1]), level="debug", ) info = {"summary": "", "requires_python": None, "requires_dist": None} filename = os.path.basename(urlparse.urlparse(url).path) with temporary_directory() as temp_dir: filepath = Path(temp_dir) / filename self._download(url, str(filepath)) try: meta = pkginfo.SDist(str(filepath)) if meta.summary: info["summary"] = meta.summary if meta.requires_python: info["requires_python"] = meta.requires_python if meta.requires_dist: info["requires_dist"] = list(meta.requires_dist) return info except ValueError: # Unable to determine dependencies # We pass and go deeper pass # Still not dependencies found # So, we unpack and introspect suffix = filepath.suffix gz = None if suffix == ".zip": tar = zipfile.ZipFile(str(filepath)) else: if suffix == ".bz2": gz = BZ2File(str(filepath)) suffixes = filepath.suffixes if len(suffixes) > 1 and suffixes[-2] == ".tar": suffix = ".tar.bz2" else: gz = GzipFile(str(filepath)) suffix = ".tar.gz" tar = tarfile.TarFile(str(filepath), fileobj=gz) try: tar.extractall(os.path.join(temp_dir, "unpacked")) finally: if gz: gz.close() tar.close() unpacked = Path(temp_dir) / "unpacked" sdist_dir = unpacked / Path(filename).name.rstrip(suffix) # Checking for .egg-info at root eggs = list(sdist_dir.glob("*.egg-info")) if eggs: egg_info = eggs[0] requires = egg_info / "requires.txt" if requires.exists(): with requires.open() as f: info["requires_dist"] = parse_requires(f.read()) return info # Searching for .egg-info in sub directories eggs = list(sdist_dir.glob("**/*.egg-info")) if eggs: egg_info = eggs[0] requires = egg_info / "requires.txt" if requires.exists(): with requires.open() as f: info["requires_dist"] = parse_requires(f.read()) return info # Still nothing, try reading (without executing it) # the setup.py file. try: setup_info = self._inspect_sdist_with_setup(sdist_dir) for key, value in info.items(): if value: continue info[key] = setup_info[key] return info except Exception as e: self._log( "An error occurred when reading setup.py or setup.cfg: {}".format( str(e) ), "warning", ) return info def _inspect_sdist_with_setup(self, sdist_dir): info = {"requires_python": None, "requires_dist": None} result = SetupReader.read_from_directory(sdist_dir) requires = "" for dep in result["install_requires"]: requires += dep + "\n" if result["extras_require"]: requires += "\n" for extra_name, deps in result["extras_require"].items(): requires += "[{}]\n".format(extra_name) for dep in deps: requires += dep + "\n" requires += "\n" info["requires_dist"] = parse_requires(requires) info["requires_python"] = result["python_requires"] return info def _download(self, url, dest): # type: (str, str) -> None r = get(url, stream=True) r.raise_for_status() with open(dest, "wb") as f: for chunk in r.iter_content(chunk_size=1024): if chunk: f.write(chunk) def _log(self, msg, level="info"): getattr(logger, level)("<comment>{}:</comment> {}".format(self._name, msg))
class PyPiRepository(Repository): def __init__(self, url="https://pypi.org/", disable_cache=False, fallback=True): self._name = "PyPI" self._url = url self._disable_cache = disable_cache self._fallback = fallback release_cache_dir = Path(CACHE_DIR) / "cache" / "repositories" / "pypi" self._cache = CacheManager( { "default": "releases", "serializer": "json", "stores": { "releases": {"driver": "file", "path": str(release_cache_dir)}, "packages": {"driver": "dict"}, }, } ) self._session = CacheControl( session(), cache=FileCache(str(release_cache_dir / "_http")) ) super(PyPiRepository, self).__init__() def find_packages( self, name, # type: str constraint=None, # type: Union[VersionConstraint, str, None] extras=None, # type: Union[list, None] allow_prereleases=False, # type: bool ): # type: (...) -> List[Package] """ Find packages on the remote server. """ if constraint is None: constraint = "*" if not isinstance(constraint, VersionConstraint): constraint = parse_constraint(constraint) info = self.get_package_info(name) packages = [] for version, release in info["releases"].items(): if not release: # Bad release self._log( "No release information found for {}-{}, skipping".format( name, version ), level="debug", ) continue package = Package(name, version) if ( package.is_prerelease() and not allow_prereleases and not constraint.allows(package.version) ): continue if not constraint or (constraint and constraint.allows(package.version)): if extras is not None: package.requires_extras = extras packages.append(package) self._log( "{} packages found for {} {}".format(len(packages), name, str(constraint)), level="debug", ) return packages def package( self, name, # type: str version, # type: str extras=None, # type: (Union[list, None]) ): # type: (...) -> Union[Package, None] try: index = self._packages.index(Package(name, version, version)) return self._packages[index] except ValueError: if extras is None: extras = [] release_info = self.get_release_info(name, version) package = Package(name, version, version) requires_dist = release_info["requires_dist"] or [] for req in requires_dist: try: dependency = dependency_from_pep_508(req) except InvalidMarker: # Invalid marker # We strip the markers hoping for the best req = req.split(";")[0] dependency = dependency_from_pep_508(req) except ValueError: # Likely unable to parse constraint so we skip it self._log( "Invalid constraint ({}) found in {}-{} dependencies, " "skipping".format(req, package.name, package.version), level="debug", ) continue if dependency.extras: for extra in dependency.extras: if extra not in package.extras: package.extras[extra] = [] package.extras[extra].append(dependency) if not dependency.is_optional(): package.requires.append(dependency) # Adding description package.description = release_info.get("summary", "") if release_info["requires_python"]: package.python_versions = release_info["requires_python"] if release_info["platform"]: package.platform = release_info["platform"] # Adding hashes information package.hashes = release_info["digests"] # Activate extra dependencies for extra in extras: if extra in package.extras: for dep in package.extras[extra]: dep.activate() package.requires += package.extras[extra] self._packages.append(package) return package def search(self, query, mode=0): results = [] search = {"name": query} if mode == self.SEARCH_FULLTEXT: search["summary"] = query client = ServerProxy("https://pypi.python.org/pypi") hits = client.search(search, "or") for hit in hits: result = Package(hit["name"], hit["version"], hit["version"]) result.description = to_str(hit["summary"]) results.append(result) return results def get_package_info(self, name): # type: (str) -> dict """ Return the package information given its name. The information is returned from the cache if it exists or retrieved from the remote server. """ if self._disable_cache: return self._get_package_info(name) return self._cache.store("packages").remember_forever( name, lambda: self._get_package_info(name) ) def _get_package_info(self, name): # type: (str) -> dict data = self._get("pypi/{}/json".format(name)) if data is None: raise ValueError("Package [{}] not found.".format(name)) return data def get_release_info(self, name, version): # type: (str, str) -> dict """ Return the release information given a package name and a version. The information is returned from the cache if it exists or retrieved from the remote server. """ if self._disable_cache: return self._get_release_info(name, version) return self._cache.remember_forever( "{}:{}".format(name, version), lambda: self._get_release_info(name, version) ) def _get_release_info(self, name, version): # type: (str, str) -> dict self._log("Getting info for {} ({}) from PyPI".format(name, version), "debug") json_data = self._get("pypi/{}/{}/json".format(name, version)) if json_data is None: raise ValueError("Package [{}] not found.".format(name)) info = json_data["info"] data = { "name": info["name"], "version": info["version"], "summary": info["summary"], "platform": info["platform"], "requires_dist": info["requires_dist"], "requires_python": info["requires_python"], "digests": [], "_fallback": False, } try: version_info = json_data["releases"][version] except KeyError: version_info = [] for file_info in version_info: data["digests"].append(file_info["digests"]["sha256"]) if self._fallback and data["requires_dist"] is None: self._log("No dependencies found, downloading archives", level="debug") # No dependencies set (along with other information) # This might be due to actually no dependencies # or badly set metadata when uploading # So, we need to make sure there is actually no # dependencies by introspecting packages urls = {} for url in json_data["urls"]: # Only get sdist and universal wheels dist_type = url["packagetype"] if dist_type not in ["sdist", "bdist_wheel"]: continue if dist_type == "sdist" and "dist" not in urls: urls[url["packagetype"]] = url["url"] continue if "bdist_wheel" in urls: continue # If bdist_wheel, check if it's universal python_version = url["python_version"] if python_version not in ["py2.py3", "py3", "py2"]: continue parts = urlparse.urlparse(url["url"]) filename = os.path.basename(parts.path) if "-none-any" not in filename: continue if not urls: return data info = self._get_info_from_urls(urls) data["requires_dist"] = info["requires_dist"] if not data["requires_python"]: data["requires_python"] = info["requires_python"] return data def _get(self, endpoint): # type: (str) -> Union[dict, None] json_response = self._session.get(self._url + endpoint) if json_response.status_code == 404: return None json_data = json_response.json() return json_data def _get_info_from_urls( self, urls ): # type: (Dict[str, str]) -> Dict[str, Union[str, List, None]] if "bdist_wheel" in urls: return self._get_info_from_wheel(urls["bdist_wheel"]) return self._get_info_from_sdist(urls["sdist"]) def _get_info_from_wheel( self, url ): # type: (str) -> Dict[str, Union[str, List, None]] info = {"summary": "", "requires_python": None, "requires_dist": None} filename = os.path.basename(urlparse.urlparse(url).path) with temporary_directory() as temp_dir: filepath = os.path.join(temp_dir, filename) self._download(url, filepath) try: meta = pkginfo.Wheel(filepath) except ValueError: # Unable to determine dependencies # Assume none return info if meta.summary: info["summary"] = meta.summary or "" info["requires_python"] = meta.requires_python if meta.requires_dist: info["requires_dist"] = meta.requires_dist return info def _get_info_from_sdist( self, url ): # type: (str) -> Dict[str, Union[str, List, None]] info = {"summary": "", "requires_python": None, "requires_dist": None} filename = os.path.basename(urlparse.urlparse(url).path) with temporary_directory() as temp_dir: filepath = Path(temp_dir) / filename self._download(url, str(filepath)) try: meta = pkginfo.SDist(str(filepath)) if meta.summary: info["summary"] = meta.summary if meta.requires_python: info["requires_python"] = meta.requires_python if meta.requires_dist: info["requires_dist"] = list(meta.requires_dist) return info except ValueError: # Unable to determine dependencies # We pass and go deeper pass # Still not dependencies found # So, we unpack and introspect suffix = filepath.suffix gz = None if suffix == ".zip": tar = zipfile.ZipFile(str(filepath)) else: if suffix == ".bz2": gz = BZ2File(str(filepath)) else: gz = GzipFile(str(filepath)) tar = tarfile.TarFile(str(filepath), fileobj=gz) try: tar.extractall(os.path.join(temp_dir, "unpacked")) finally: if gz: gz.close() tar.close() unpacked = Path(temp_dir) / "unpacked" sdist_dir = unpacked / Path(filename).name.rstrip(".tar.gz") # Checking for .egg-info at root eggs = list(sdist_dir.glob("*.egg-info")) if eggs: egg_info = eggs[0] requires = egg_info / "requires.txt" if requires.exists(): with requires.open() as f: info["requires_dist"] = parse_requires(f.read()) return info # Searching for .egg-info in sub directories eggs = list(sdist_dir.glob("**/*.egg-info")) if eggs: egg_info = eggs[0] requires = egg_info / "requires.txt" if requires.exists(): with requires.open() as f: info["requires_dist"] = parse_requires(f.read()) return info # Still nothing, assume no dependencies # We could probably get them by executing # python setup.py egg-info but I don't feel # confortable executing a file just for the sake # of getting dependencies. return info def _inspect_sdist_with_setup(self, sdist_dir): info = {"requires_python": None, "requires_dist": None} setup = sdist_dir / "setup.py" if not setup.exists(): return info venv = Venv.create(NullIO()) current_dir = os.getcwd() os.chdir(sdist_dir.as_posix()) try: venv.run("python", "setup.py", "egg_info") egg_info = list(sdist_dir.glob("**/*.egg-info"))[0] meta = pkginfo.UnpackedSDist(str(egg_info)) if meta.requires_python: info["requires_python"] = meta.requires_python if meta.requires_dist: info["requires_dist"] = list(meta.requires_dist) else: requires = egg_info / "requires.txt" if requires.exists(): with requires.open() as f: info["requires_dist"] = parse_requires(f.read()) except Exception: pass os.chdir(current_dir) return info def _download(self, url, dest): # type: (str, str) -> None r = get(url, stream=True) with open(dest, "wb") as f: for chunk in r.iter_content(chunk_size=1024): if chunk: f.write(chunk) def _log(self, msg, level="info"): getattr(logger, level)("{}: {}".format(self._name, msg))
class LegacyRepository(PyPiRepository): def __init__( self, name, url, config=None, disable_cache=False, cert=None, client_cert=None ): # type: (str, str, Optional[Config], bool, Optional[Path], Optional[Path]) -> None if name == "pypi": raise ValueError("The name [pypi] is reserved for repositories") self._packages = [] self._name = name self._url = url.rstrip("/") self._client_cert = client_cert self._cert = cert self._cache_dir = REPOSITORY_CACHE_DIR / name self._cache = CacheManager({ "default": "releases", "serializer": "json", "stores": { "releases": { "driver": "file", "path": str(self._cache_dir) }, "packages": { "driver": "dict" }, "matches": { "driver": "dict" }, }, }) self._authenticator = Authenticator( config=config or Config(use_environment=True)) self._basic_auth = None username, password = self._authenticator.get_credentials_for_url( self._url) if username is not None and password is not None: self._basic_auth = requests.auth.HTTPBasicAuth(username, password) self._disable_cache = disable_cache @property def cert(self): # type: () -> Optional[Path] return self._cert @property def client_cert(self): # type: () -> Optional[Path] return self._client_cert @property def session(self): session = self._authenticator.session if self._basic_auth: session.auth = self._basic_auth if self._cert: session.verify = str(self._cert) if self._client_cert: session.cert = str(self._client_cert) return CacheControl(session, cache=FileCache(str(self._cache_dir / "_http"))) @property def authenticated_url(self): # type: () -> str if not self._basic_auth: return self.url parsed = urlparse.urlparse(self.url) return "{scheme}://{username}:{password}@{netloc}{path}".format( scheme=parsed.scheme, username=quote(self._basic_auth.username, safe=""), password=quote(self._basic_auth.password, safe=""), netloc=parsed.netloc, path=parsed.path, ) def find_packages(self, dependency): packages = [] constraint = dependency.constraint if constraint is None: constraint = "*" if not isinstance(constraint, VersionConstraint): constraint = parse_constraint(constraint) allow_prereleases = dependency.allows_prereleases() if isinstance(constraint, VersionRange): if (constraint.max is not None and constraint.max.is_prerelease() or constraint.min is not None and constraint.min.is_prerelease()): allow_prereleases = True key = dependency.name if not constraint.is_any(): key = "{}:{}".format(key, str(constraint)) ignored_pre_release_versions = [] if self._cache.store("matches").has(key): versions = self._cache.store("matches").get(key) else: page = self._get("/{}/".format(dependency.name.replace(".", "-"))) if page is None: return [] versions = [] for version in page.versions: if version.is_prerelease() and not allow_prereleases: if constraint.is_any(): # we need this when all versions of the package are pre-releases ignored_pre_release_versions.append(version) continue if constraint.allows(version): versions.append(version) self._cache.store("matches").put(key, versions, 5) for package_versions in (versions, ignored_pre_release_versions): for version in package_versions: package = Package( dependency.name, version, source_type="legacy", source_reference=self.name, source_url=self._url, ) packages.append(package) self._log( "{} packages found for {} {}".format(len(packages), dependency.name, str(constraint)), level="debug", ) if packages or not constraint.is_any(): # we have matching packages, or constraint is not (*) break return packages def package(self, name, version, extras=None): # type: (...) -> Package """ Retrieve the release information. This is a heavy task which takes time. We have to download a package to get the dependencies. We also need to download every file matching this release to get the various hashes. Note that this will be cached so the subsequent operations should be much faster. """ try: index = self._packages.index(Package(name, version, version)) return self._packages[index] except ValueError: package = super(LegacyRepository, self).package(name, version, extras) package._source_type = "legacy" package._source_url = self._url package._source_reference = self.name return package def find_links_for_package(self, package): page = self._get("/{}/".format(package.name.replace(".", "-"))) if page is None: return [] return list(page.links_for_version(package.version)) def _get_release_info(self, name, version): # type: (str, str) -> dict page = self._get("/{}/".format( canonicalize_name(name).replace(".", "-"))) if page is None: raise PackageNotFound('No package named "{}"'.format(name)) data = PackageInfo( name=name, version=version, summary="", platform=None, requires_dist=[], requires_python=None, files=[], cache_version=str(self.CACHE_VERSION), ) links = list(page.links_for_version(Version.parse(version))) if not links: raise PackageNotFound( 'No valid distribution links found for package: "{}" version: "{}"' .format(name, version)) urls = defaultdict(list) files = [] for link in links: if link.is_wheel: urls["bdist_wheel"].append(link.url) elif link.filename.endswith( (".tar.gz", ".zip", ".bz2", ".xz", ".Z", ".tar")): urls["sdist"].append(link.url) h = link.hash if h: h = link.hash_name + ":" + link.hash files.append({"file": link.filename, "hash": h}) data.files = files info = self._get_info_from_urls(urls) data.summary = info.summary data.requires_dist = info.requires_dist data.requires_python = info.requires_python return data.asdict() def _get(self, endpoint): # type: (str) -> Union[Page, None] url = self._url + endpoint try: response = self.session.get(url) if response.status_code == 404: return response.raise_for_status() except requests.HTTPError as e: raise RepositoryError(e) if response.status_code in (401, 403): self._log("Authorization error accessing {url}".format(url=url), level="warn") return return Page(url, response.content, response.headers)
class LegacyRepository(PyPiRepository): def __init__(self, name, url, disable_cache=False): if name == "pypi": raise ValueError("The name [pypi] is reserved for repositories") self._packages = [] self._name = name self._url = url.rstrip("/") self._cache_dir = Path(CACHE_DIR) / "cache" / "repositories" / name self._cache = CacheManager( { "default": "releases", "serializer": "json", "stores": { "releases": {"driver": "file", "path": str(self._cache_dir)}, "packages": {"driver": "dict"}, "matches": {"driver": "dict"}, }, } ) self._session = CacheControl( requests.session(), cache=FileCache(str(self._cache_dir / "_http")) ) url_parts = urlparse.urlparse(self._url) if not url_parts.username: self._session.auth = get_http_basic_auth(self.name) self._disable_cache = disable_cache @property def name(self): return self._name def find_packages( self, name, constraint=None, extras=None, allow_prereleases=False ): packages = [] if constraint is not None and not isinstance(constraint, VersionConstraint): constraint = parse_constraint(constraint) key = name if constraint: key = "{}:{}".format(key, str(constraint)) if self._cache.store("matches").has(key): versions = self._cache.store("matches").get(key) else: page = self._get("/{}/".format(canonicalize_name(name).replace(".", "-"))) if page is None: return [] versions = [] for version in page.versions: if not constraint or (constraint and constraint.allows(version)): versions.append(version) self._cache.store("matches").put(key, versions, 5) for version in versions: package = Package(name, version) package.source_type = "legacy" package.source_url = self._url if extras is not None: package.requires_extras = extras packages.append(package) self._log( "{} packages found for {} {}".format(len(packages), name, str(constraint)), level="debug", ) return packages def package( self, name, version, extras=None ): # type: (...) -> poetry.packages.Package """ Retrieve the release information. This is a heavy task which takes time. We have to download a package to get the dependencies. We also need to download every file matching this release to get the various hashes. Note that, this will be cached so the subsequent operations should be much faster. """ try: index = self._packages.index( poetry.packages.Package(name, version, version) ) return self._packages[index] except ValueError: if extras is None: extras = [] release_info = self.get_release_info(name, version) package = poetry.packages.Package(name, version, version) package.source_type = "legacy" package.source_url = self._url package.source_reference = self.name requires_dist = release_info["requires_dist"] or [] for req in requires_dist: try: dependency = dependency_from_pep_508(req) except InvalidMarker: # Invalid marker # We strip the markers hoping for the best req = req.split(";")[0] dependency = dependency_from_pep_508(req) if dependency.extras: for extra in dependency.extras: if extra not in package.extras: package.extras[extra] = [] package.extras[extra].append(dependency) if not dependency.is_optional(): package.requires.append(dependency) # Adding description package.description = release_info.get("summary", "") # Adding hashes information package.hashes = release_info["digests"] # Activate extra dependencies for extra in extras: if extra in package.extras: for dep in package.extras[extra]: dep.activate() package.requires += package.extras[extra] self._packages.append(package) return package def _get_release_info(self, name, version): # type: (str, str) -> dict page = self._get("/{}/".format(canonicalize_name(name).replace(".", "-"))) if page is None: raise ValueError('No package named "{}"'.format(name)) data = { "name": name, "version": version, "summary": "", "requires_dist": [], "requires_python": [], "digests": [], } links = list(page.links_for_version(Version.parse(version))) urls = {} hashes = [] default_link = links[0] for link in links: if link.is_wheel: urls["bdist_wheel"] = link.url elif link.filename.endswith(".tar.gz"): urls["sdist"] = link.url elif link.filename.endswith((".zip", ".bz2")) and "sdist" not in urls: urls["sdist"] = link.url hash = link.hash if link.hash_name == "sha256": hashes.append(hash) data["digests"] = hashes if not urls: if default_link.is_wheel: m = wheel_file_re.match(default_link.filename) python = m.group("pyver") platform = m.group("plat") if python == "py2.py3" and platform == "any": urls["bdist_wheel"] = default_link.url elif default_link.filename.endswith(".tar.gz"): urls["sdist"] = default_link.url elif ( default_link.filename.endswith((".zip", ".bz2")) and "sdist" not in urls ): urls["sdist"] = default_link.url else: return data info = self._get_info_from_urls(urls) data["summary"] = info["summary"] data["requires_dist"] = info["requires_dist"] data["requires_python"] = info["requires_python"] return data def _download(self, url, dest): # type: (str, str) -> None r = self._session.get(url, stream=True) with open(dest, "wb") as f: for chunk in r.iter_content(chunk_size=1024): if chunk: f.write(chunk) def _get(self, endpoint): # type: (str) -> Union[Page, None] url = self._url + endpoint response = self._session.get(url) if response.status_code == 404: return return Page(url, response.content, response.headers)
class CachedRepository(Repository, ABC): CACHE_VERSION = parse_constraint("1.0.0") def __init__(self, name: str, disable_cache: bool = False) -> None: super().__init__(name) self._disable_cache = disable_cache self._cache_dir = REPOSITORY_CACHE_DIR / name self._cache = CacheManager({ "default": "releases", "serializer": "json", "stores": { "releases": { "driver": "file", "path": str(self._cache_dir) }, "packages": { "driver": "dict" }, "matches": { "driver": "dict" }, }, }) @abstractmethod def _get_release_info(self, name: str, version: str) -> dict[str, Any]: raise NotImplementedError() def get_release_info(self, name: str, version: str) -> PackageInfo: """ Return the release information given a package name and a version. The information is returned from the cache if it exists or retrieved from the remote server. """ from poetry.inspection.info import PackageInfo if self._disable_cache: return PackageInfo.load(self._get_release_info(name, version)) cached = self._cache.remember_forever( f"{name}:{version}", lambda: self._get_release_info(name, version)) cache_version = cached.get("_cache_version", "0.0.0") if parse_constraint(cache_version) != self.CACHE_VERSION: # The cache must be updated self._log( f"The cache for {name} {version} is outdated. Refreshing.", level="debug", ) cached = self._get_release_info(name, version) self._cache.forever(f"{name}:{version}", cached) return PackageInfo.load(cached) def package( self, name: str, version: str, extras: list[str] | None = None, ) -> Package: return self.get_release_info(name, version).to_package(name=name, extras=extras)
class LegacyRepository(PyPiRepository): def __init__(self, name, url): if name == 'pypi': raise ValueError('The name [pypi] is reserved for repositories') self._packages = [] self._name = name self._url = url command = get_pip_command() opts, _ = command.parse_args([]) self._session = command._build_session(opts) self._repository = PyPIRepository(opts, self._session) self._cache_dir = Path(CACHE_DIR) / 'cache' / 'repositories' / name self._cache = CacheManager({ 'default': 'releases', 'serializer': 'json', 'stores': { 'releases': { 'driver': 'file', 'path': str(self._cache_dir) }, 'packages': { 'driver': 'dict' }, 'matches': { 'driver': 'dict' } } }) def find_packages(self, name, constraint=None, extras=None): packages = [] if constraint is not None and not isinstance(constraint, BaseConstraint): version_parser = VersionParser() constraint = version_parser.parse_constraints(constraint) key = name if constraint: key = f'{key}:{str(constraint)}' if self._cache.store('matches').has(key): versions = self._cache.store('matches').get(key) else: candidates = [ str(c.version) for c in self._repository.find_all_candidates(name) ] versions = [] for version in candidates: if version in versions: continue if (not constraint or (constraint and constraint.matches(Constraint('=', version)))): versions.append(version) self._cache.store('matches').put(key, versions, 5) for version in versions: packages.append(self.package(name, version, extras=extras)) return packages def package(self, name, version, extras=None): # type: (...) -> poetry.packages.Package """ Retrieve the release information. This is a heavy task which takes time. We have to download a package to get the dependencies. We also need to download every file matching this release to get the various hashes. Note that, this will be cached so the subsequent operations should be much faster. """ try: index = self._packages.index( poetry.packages.Package(name, version, version)) return self._packages[index] except ValueError: if extras is None: extras = [] release_info = self.get_release_info(name, version) package = poetry.packages.Package(name, version, version) for req in release_info['requires_dist']: try: dependency = dependency_from_pep_508(req) except InvalidMarker: # Invalid marker # We strip the markers hoping for the best req = req.split(';')[0] dependency = dependency_from_pep_508(req) if dependency.extras: for extra in dependency.extras: if extra not in package.extras: package.extras[extra] = [] package.extras[extra].append(dependency) if not dependency.is_optional(): package.requires.append(dependency) # Adding description package.description = release_info.get('summary', '') # Adding hashes information package.hashes = release_info['digests'] # Activate extra dependencies for extra in extras: if extra in package.extras: for dep in package.extras[extra]: dep.activate() package.requires += package.extras[extra] self._packages.append(package) return package def get_release_info(self, name, version): # type: (str, str) -> dict """ Return the release information given a package name and a version. The information is returned from the cache if it exists or retrieved from the remote server. """ return self._cache.store('releases').remember_forever( f'{name}:{version}', lambda: self._get_release_info(name, version)) def _get_release_info(self, name, version): # type: (str, str) -> dict from pip.req import InstallRequirement from pip.exceptions import InstallationError ireq = InstallRequirement.from_line(f'{name}=={version}') resolver = Resolver([ireq], self._repository, cache=DependencyCache(self._cache_dir.as_posix())) try: requirements = list(resolver._iter_dependencies(ireq)) except (InstallationError, RequirementParseError): # setup.py egg-info error most likely # So we assume no dependencies requirements = [] requires = [] for dep in requirements: constraint = str(dep.req.specifier) require = f'{dep.name}' if constraint: require += f' ({constraint})' requires.append(require) hashes = resolver.resolve_hashes([ireq])[ireq] hashes = [h.split(':')[1] for h in hashes] data = { 'name': name, 'version': version, 'summary': '', 'requires_dist': requires, 'digests': hashes } resolver.repository.freshen_build_caches() return data
class PyPiRepository(Repository): def __init__(self, url="https://pypi.org/", disable_cache=False, fallback=True): self._name = "PyPI" self._url = url self._disable_cache = disable_cache self._fallback = fallback release_cache_dir = Path(CACHE_DIR) / "cache" / "repositories" / "pypi" self._cache = CacheManager( { "default": "releases", "serializer": "json", "stores": { "releases": {"driver": "file", "path": str(release_cache_dir)}, "packages": {"driver": "dict"}, }, } ) self._session = CacheControl( session(), cache=FileCache(str(release_cache_dir / "_http")) ) super(PyPiRepository, self).__init__() def find_packages( self, name, # type: str constraint=None, # type: Union[VersionConstraint, str, None] extras=None, # type: Union[list, None] allow_prereleases=False, # type: bool ): # type: (...) -> List[Package] """ Find packages on the remote server. """ if constraint is None: constraint = "*" if not isinstance(constraint, VersionConstraint): constraint = parse_constraint(constraint) info = self.get_package_info(name) packages = [] for version, release in info["releases"].items(): if not release: # Bad release self._log( "No release information found for {}-{}, skipping".format( name, version ), level="debug", ) continue package = Package(name, version) if ( package.is_prerelease() and not allow_prereleases and not constraint.allows(package.version) ): continue if not constraint or (constraint and constraint.allows(package.version)): if extras is not None: package.requires_extras = extras packages.append(package) self._log( "{} packages found for {} {}".format(len(packages), name, str(constraint)), level="debug", ) return packages def package( self, name, # type: str version, # type: str extras=None, # type: (Union[list, None]) ): # type: (...) -> Union[Package, None] if extras is None: extras = [] release_info = self.get_release_info(name, version) package = Package(name, version, version) requires_dist = release_info["requires_dist"] or [] for req in requires_dist: try: dependency = dependency_from_pep_508(req) except InvalidMarker: # Invalid marker # We strip the markers hoping for the best req = req.split(";")[0] dependency = dependency_from_pep_508(req) except ValueError: # Likely unable to parse constraint so we skip it self._log( "Invalid constraint ({}) found in {}-{} dependencies, " "skipping".format(req, package.name, package.version), level="debug", ) continue if dependency.extras: for extra in dependency.extras: if extra not in package.extras: package.extras[extra] = [] package.extras[extra].append(dependency) if not dependency.is_optional(): package.requires.append(dependency) # Adding description package.description = release_info.get("summary", "") if release_info["requires_python"]: package.python_versions = release_info["requires_python"] if release_info["platform"]: package.platform = release_info["platform"] # Adding hashes information package.hashes = release_info["digests"] # Activate extra dependencies for extra in extras: if extra in package.extras: for dep in package.extras[extra]: dep.activate() package.requires += package.extras[extra] return package def search(self, query, mode=0): results = [] search = {"name": query} if mode == self.SEARCH_FULLTEXT: search["summary"] = query client = ServerProxy("https://pypi.python.org/pypi") hits = client.search(search, "or") for hit in hits: result = Package(hit["name"], hit["version"], hit["version"]) result.description = to_str(hit["summary"]) results.append(result) return results def get_package_info(self, name): # type: (str) -> dict """ Return the package information given its name. The information is returned from the cache if it exists or retrieved from the remote server. """ if self._disable_cache: return self._get_package_info(name) return self._cache.store("packages").remember_forever( name, lambda: self._get_package_info(name) ) def _get_package_info(self, name): # type: (str) -> dict data = self._get("pypi/{}/json".format(name)) if data is None: raise ValueError("Package [{}] not found.".format(name)) return data def get_release_info(self, name, version): # type: (str, str) -> dict """ Return the release information given a package name and a version. The information is returned from the cache if it exists or retrieved from the remote server. """ if self._disable_cache: return self._get_release_info(name, version) return self._cache.remember_forever( "{}:{}".format(name, version), lambda: self._get_release_info(name, version) ) def _get_release_info(self, name, version): # type: (str, str) -> dict self._log("Getting info for {} ({}) from PyPI".format(name, version), "debug") json_data = self._get("pypi/{}/{}/json".format(name, version)) if json_data is None: raise ValueError("Package [{}] not found.".format(name)) info = json_data["info"] data = { "name": info["name"], "version": info["version"], "summary": info["summary"], "platform": info["platform"], "requires_dist": info["requires_dist"], "requires_python": info["requires_python"], "digests": [], "_fallback": False, } try: version_info = json_data["releases"][version] except KeyError: version_info = [] for file_info in version_info: data["digests"].append(file_info["digests"]["sha256"]) if self._fallback and data["requires_dist"] is None: self._log("No dependencies found, downloading archives", level="debug") # No dependencies set (along with other information) # This might be due to actually no dependencies # or badly set metadata when uploading # So, we need to make sure there is actually no # dependencies by introspecting packages urls = {} for url in json_data["urls"]: # Only get sdist and universal wheels if they exist dist_type = url["packagetype"] if dist_type not in ["sdist", "bdist_wheel"]: continue if dist_type == "sdist" and "dist" not in urls: urls[url["packagetype"]] = url["url"] continue if "bdist_wheel" in urls: continue # If bdist_wheel, check if it's universal python_version = url["python_version"] if python_version not in ["py2.py3", "py3", "py2"]: continue parts = urlparse.urlparse(url["url"]) filename = os.path.basename(parts.path) if "-none-any" not in filename: continue if not urls: # If we don't have urls, we try to take the first one # we find and go from there if not json_data["urls"]: return data for url in json_data["urls"]: # Only get sdist and universal wheels if they exist dist_type = url["packagetype"] if dist_type != "bdist_wheel": continue urls[url["packagetype"]] = url["url"] break if not urls: return data info = self._get_info_from_urls(urls) data["requires_dist"] = info["requires_dist"] if not data["requires_python"]: data["requires_python"] = info["requires_python"] return data def _get(self, endpoint): # type: (str) -> Union[dict, None] json_response = self._session.get(self._url + endpoint) if json_response.status_code == 404: return None json_data = json_response.json() return json_data def _get_info_from_urls( self, urls ): # type: (Dict[str, str]) -> Dict[str, Union[str, List, None]] if "bdist_wheel" in urls: return self._get_info_from_wheel(urls["bdist_wheel"]) return self._get_info_from_sdist(urls["sdist"]) def _get_info_from_wheel( self, url ): # type: (str) -> Dict[str, Union[str, List, None]] info = {"summary": "", "requires_python": None, "requires_dist": None} filename = os.path.basename(urlparse.urlparse(url).path) with temporary_directory() as temp_dir: filepath = os.path.join(temp_dir, filename) self._download(url, filepath) try: meta = pkginfo.Wheel(filepath) except ValueError: # Unable to determine dependencies # Assume none return info if meta.summary: info["summary"] = meta.summary or "" info["requires_python"] = meta.requires_python if meta.requires_dist: info["requires_dist"] = meta.requires_dist return info def _get_info_from_sdist( self, url ): # type: (str) -> Dict[str, Union[str, List, None]] info = {"summary": "", "requires_python": None, "requires_dist": None} filename = os.path.basename(urlparse.urlparse(url).path) with temporary_directory() as temp_dir: filepath = Path(temp_dir) / filename self._download(url, str(filepath)) try: meta = pkginfo.SDist(str(filepath)) if meta.summary: info["summary"] = meta.summary if meta.requires_python: info["requires_python"] = meta.requires_python if meta.requires_dist: info["requires_dist"] = list(meta.requires_dist) return info except ValueError: # Unable to determine dependencies # We pass and go deeper pass # Still not dependencies found # So, we unpack and introspect suffix = filepath.suffix gz = None if suffix == ".zip": tar = zipfile.ZipFile(str(filepath)) else: if suffix == ".bz2": gz = BZ2File(str(filepath)) else: gz = GzipFile(str(filepath)) tar = tarfile.TarFile(str(filepath), fileobj=gz) try: tar.extractall(os.path.join(temp_dir, "unpacked")) finally: if gz: gz.close() tar.close() unpacked = Path(temp_dir) / "unpacked" sdist_dir = unpacked / Path(filename).name.rstrip(".tar.gz") # Checking for .egg-info at root eggs = list(sdist_dir.glob("*.egg-info")) if eggs: egg_info = eggs[0] requires = egg_info / "requires.txt" if requires.exists(): with requires.open() as f: info["requires_dist"] = parse_requires(f.read()) return info # Searching for .egg-info in sub directories eggs = list(sdist_dir.glob("**/*.egg-info")) if eggs: egg_info = eggs[0] requires = egg_info / "requires.txt" if requires.exists(): with requires.open() as f: info["requires_dist"] = parse_requires(f.read()) return info # Still nothing, assume no dependencies # We could probably get them by executing # python setup.py egg-info but I don't feel # confortable executing a file just for the sake # of getting dependencies. return info def _inspect_sdist_with_setup(self, sdist_dir): info = {"requires_python": None, "requires_dist": None} setup = sdist_dir / "setup.py" if not setup.exists(): return info venv = Venv.create(NullIO()) current_dir = os.getcwd() os.chdir(sdist_dir.as_posix()) try: venv.run("python", "setup.py", "egg_info") egg_info = list(sdist_dir.glob("**/*.egg-info"))[0] meta = pkginfo.UnpackedSDist(str(egg_info)) if meta.requires_python: info["requires_python"] = meta.requires_python if meta.requires_dist: info["requires_dist"] = list(meta.requires_dist) else: requires = egg_info / "requires.txt" if requires.exists(): with requires.open() as f: info["requires_dist"] = parse_requires(f.read()) except Exception: pass os.chdir(current_dir) return info def _download(self, url, dest): # type: (str, str) -> None r = get(url, stream=True) with open(dest, "wb") as f: for chunk in r.iter_content(chunk_size=1024): if chunk: f.write(chunk) def _log(self, msg, level="info"): getattr(logger, level)("<comment>{}:</comment> {}".format(self._name, msg))
class LegacyRepository(PyPiRepository): def __init__( self, name: str, url: str, config: Optional[Config] = None, disable_cache: bool = False, cert: Optional[Path] = None, client_cert: Optional[Path] = None, ) -> None: if name == "pypi": raise ValueError("The name [pypi] is reserved for repositories") self._packages = [] self._name = name self._url = url.rstrip("/") self._client_cert = client_cert self._cert = cert self._cache_dir = REPOSITORY_CACHE_DIR / name self._cache = CacheManager( { "default": "releases", "serializer": "json", "stores": { "releases": {"driver": "file", "path": str(self._cache_dir)}, "packages": {"driver": "dict"}, "matches": {"driver": "dict"}, }, } ) self._authenticator = Authenticator( config=config or Config(use_environment=True) ) self._session = CacheControl( self._authenticator.session, cache=FileCache(str(self._cache_dir / "_http")) ) username, password = self._authenticator.get_credentials_for_url(self._url) if username is not None and password is not None: self._authenticator.session.auth = requests.auth.HTTPBasicAuth( username, password ) if self._cert: self._authenticator.session.verify = str(self._cert) if self._client_cert: self._authenticator.session.cert = str(self._client_cert) self._disable_cache = disable_cache @property def cert(self) -> Optional[Path]: return self._cert @property def client_cert(self) -> Optional[Path]: return self._client_cert @property def authenticated_url(self) -> str: if not self._session.auth: return self.url parsed = urllib.parse.urlparse(self.url) username = quote(self._session.auth.username, safe="") password = quote(self._session.auth.password, safe="") return f"{parsed.scheme}://{username}:{password}@{parsed.netloc}{parsed.path}" def find_packages(self, dependency: "Dependency") -> List[Package]: packages = [] constraint = dependency.constraint if constraint is None: constraint = "*" if not isinstance(constraint, VersionConstraint): constraint = parse_constraint(constraint) allow_prereleases = dependency.allows_prereleases() if isinstance(constraint, VersionRange) and ( constraint.max is not None and constraint.max.is_unstable() or constraint.min is not None and constraint.min.is_unstable() ): allow_prereleases = True key = dependency.name if not constraint.is_any(): key = f"{key}:{constraint!s}" ignored_pre_release_versions = [] if self._cache.store("matches").has(key): versions = self._cache.store("matches").get(key) else: page = self._get_page(f"/{dependency.name.replace('.', '-')}/") if page is None: return [] versions = [] for version in page.versions: if version.is_unstable() and not allow_prereleases: if constraint.is_any(): # we need this when all versions of the package are pre-releases ignored_pre_release_versions.append(version) continue if constraint.allows(version): versions.append(version) self._cache.store("matches").put(key, versions, 5) for package_versions in (versions, ignored_pre_release_versions): for version in package_versions: package = Package( dependency.name, version, source_type="legacy", source_reference=self.name, source_url=self._url, ) packages.append(package) self._log( f"{len(packages)} packages found for {dependency.name} {constraint!s}", level="debug", ) if packages or not constraint.is_any(): # we have matching packages, or constraint is not (*) break return packages def package( self, name: str, version: str, extras: Optional[List[str]] = None ) -> Package: """ Retrieve the release information. This is a heavy task which takes time. We have to download a package to get the dependencies. We also need to download every file matching this release to get the various hashes. Note that this will be cached so the subsequent operations should be much faster. """ try: index = self._packages.index(Package(name, version, version)) return self._packages[index] except ValueError: package = super().package(name, version, extras) package._source_type = "legacy" package._source_url = self._url package._source_reference = self.name return package def find_links_for_package(self, package: Package) -> List[Link]: page = self._get_page(f"/{package.name.replace('.', '-')}/") if page is None: return [] return list(page.links_for_version(package.version)) def _get_release_info(self, name: str, version: str) -> dict: page = self._get_page(f"/{canonicalize_name(name).replace('.', '-')}/") if page is None: raise PackageNotFound(f'No package named "{name}"') data = PackageInfo( name=name, version=version, summary="", platform=None, requires_dist=[], requires_python=None, files=[], cache_version=str(self.CACHE_VERSION), ) links = list(page.links_for_version(Version.parse(version))) if not links: raise PackageNotFound( f'No valid distribution links found for package: "{name}" version: "{version}"' ) urls = defaultdict(list) files = [] for link in links: if link.is_wheel: urls["bdist_wheel"].append(link.url) elif link.filename.endswith( (".tar.gz", ".zip", ".bz2", ".xz", ".Z", ".tar") ): urls["sdist"].append(link.url) file_hash = f"{link.hash_name}:{link.hash}" if link.hash else None if not link.hash or ( link.hash_name not in ("sha256", "sha384", "sha512") and hasattr(hashlib, link.hash_name) ): with temporary_directory() as temp_dir: filepath = Path(temp_dir) / link.filename self._download(link.url, str(filepath)) known_hash = ( getattr(hashlib, link.hash_name)() if link.hash_name else None ) required_hash = hashlib.sha256() chunksize = 4096 with filepath.open("rb") as f: while True: chunk = f.read(chunksize) if not chunk: break if known_hash: known_hash.update(chunk) required_hash.update(chunk) if not known_hash or known_hash.hexdigest() == link.hash: file_hash = f"{required_hash.name}:{required_hash.hexdigest()}" files.append({"file": link.filename, "hash": file_hash}) data.files = files info = self._get_info_from_urls(urls) data.summary = info.summary data.requires_dist = info.requires_dist data.requires_python = info.requires_python return data.asdict() def _get_page(self, endpoint: str) -> Optional[Page]: url = self._url + endpoint try: response = self.session.get(url) if response.status_code in (401, 403): self._log( f"Authorization error accessing {url}", level="warning", ) return None if response.status_code == 404: return None response.raise_for_status() except requests.exceptions.HTTPError as e: raise RepositoryError(e) if response.url != url: self._log( f"Response URL {response.url} differs from request URL {url}", level="debug", ) return Page(response.url, response.content, response.headers) def _download(self, url: str, dest: str) -> None: return download_file(url, dest, session=self.session)
class PyPiRepository(Repository): def __init__(self, url='https://pypi.org/', disable_cache=False): self._url = url self._disable_cache = disable_cache self._cache = CacheManager({ 'default': 'releases', 'serializer': 'json', 'stores': { 'releases': { 'driver': 'file', 'path': Path(CACHE_DIR) / 'cache' / 'repositories' / 'pypi' }, 'packages': { 'driver': 'dict' } } }) super().__init__() def find_packages(self, name: str, constraint: Union[Constraint, str, None] = None, extras: Union[list, None] = None) -> List[Package]: """ Find packages on the remote server. """ packages = [] if constraint is not None and not isinstance(constraint, BaseConstraint): version_parser = VersionParser() constraint = version_parser.parse_constraints(constraint) info = self.get_package_info(name) versions = [] for version, release in info['releases'].items(): if (not constraint or (constraint and constraint.matches(Constraint('=', version)))): versions.append(version) for version in versions: packages.append(self.package(name, version, extras=extras)) return packages def package(self, name: str, version: str, extras: Union[list, None] = None) -> Package: try: index = self._packages.index(Package(name, version, version)) return self._packages[index] except ValueError: if extras is None: extras = [] release_info = self.get_release_info(name, version) package = Package(name, version, version) for req in release_info['requires_dist']: req = InstallRequirement.from_line(req) name = req.name version = str(req.req.specifier) dependency = Dependency(name, version, optional=req.markers) is_extra = False if req.markers: # Setting extra dependencies and requirements requirements = self._convert_markers(req.markers._markers) if 'python_version' in requirements: ors = [] for or_ in requirements['python_version']: ands = [] for op, version in or_: ands.append(f'{op}{version}') ors.append(' '.join(ands)) dependency.python_versions = ' || '.join(ors) if 'sys_platform' in requirements: ors = [] for or_ in requirements['sys_platform']: ands = [] for op, platform in or_: ands.append(f'{op}{platform}') ors.append(' '.join(ands)) dependency.platform = ' || '.join(ors) if 'extra' in requirements: is_extra = True for _extras in requirements['extra']: for _, extra in _extras: if extra not in package.extras: package.extras[extra] = [] package.extras[extra].append(dependency) if not is_extra: package.requires.append(dependency) # Adding description package.description = release_info.get('summary', '') # Adding hashes information package.hashes = release_info['digests'] # Activate extra dependencies for extra in extras: if extra in package.extras: for dep in package.extras[extra]: dep.activate() package.requires += package.extras[extra] self._packages.append(package) return package def search(self, query, mode=0): results = [] search = {'name': query} if mode == self.SEARCH_FULLTEXT: search['summary'] = query client = ServerProxy(self._url) hits = client.search(search, 'or') for hit in hits: results.append({ 'name': hit['name'], 'description': hit['summary'], 'version': hit['version'] }) return results def get_package_info(self, name: str) -> dict: """ Return the package information given its name. The information is returned from the cache if it exists or retrieved from the remote server. """ if self._disable_cache: return self._get_package_info(name) return self._cache.store('packages').remember_forever( f'{name}', lambda: self._get_package_info(name)) def _get_package_info(self, name: str) -> dict: data = self._get(self._url + f'pypi/{name}/json') if data is None: raise ValueError(f'Package [{name}] not found.') return data def get_release_info(self, name: str, version: str) -> dict: """ Return the release information given a package name and a version. The information is returned from the cache if it exists or retrieved from the remote server. """ if self._disable_cache: return self._get_release_info(name, version) return self._cache.remember_forever( f'{name}:{version}', lambda: self._get_release_info(name, version)) def _get_release_info(self, name: str, version: str) -> dict: json_data = self._get(self._url + f'pypi/{name}/{version}/json') if json_data is None: raise ValueError(f'Package [{name}] not found.') info = json_data['info'] data = { 'name': info['name'], 'version': info['version'], 'summary': info['summary'], 'platform': info['platform'], 'requires_dist': info['requires_dist'], 'requires_python': info['requires_python'], 'digests': [] } for file_info in json_data['releases'][version]: data['digests'].append(file_info['digests']['sha256']) return data def _get(self, url: str) -> Union[dict, None]: json_response = get(url) if json_response.status_code == 404: return None json_data = json_response.json() return json_data def _group_markers(self, markers): groups = [[]] for marker in markers: assert isinstance(marker, (list, tuple, str)) if isinstance(marker, list): groups[-1].append(self._group_markers(marker)) elif isinstance(marker, tuple): lhs, op, rhs = marker groups[-1].append((lhs.value, op, rhs.value)) else: assert marker in ["and", "or"] if marker == "or": groups.append([]) return groups def _convert_markers(self, markers): groups = self._group_markers(markers)[0] requirements = {} def _group(_groups, or_=False): nonlocal requirements for group in _groups: if isinstance(group, tuple): variable, op, value = group group_name = str(variable) if group_name not in requirements: requirements[group_name] = [[]] elif or_: requirements[group_name].append([]) requirements[group_name][-1].append((str(op), str(value))) else: _group(group, or_=True) _group(groups) return requirements
class PyPiRepository(RemoteRepository): CACHE_VERSION = parse_constraint("1.0.0") def __init__(self, url="https://pypi.org/", disable_cache=False, fallback=True): super(PyPiRepository, self).__init__(url.rstrip("/") + "/simple/") self._base_url = url self._disable_cache = disable_cache self._fallback = fallback release_cache_dir = REPOSITORY_CACHE_DIR / "pypi" self._cache = CacheManager( { "default": "releases", "serializer": "json", "stores": { "releases": {"driver": "file", "path": str(release_cache_dir)}, "packages": {"driver": "dict"}, }, } ) self._cache_control_cache = FileCache(str(release_cache_dir / "_http")) self._session = CacheControl( requests.session(), cache=self._cache_control_cache ) self._name = "PyPI" @property def session(self): return self._session def find_packages(self, dependency): # type: (Dependency) -> List[Package] """ Find packages on the remote server. """ constraint = dependency.constraint if constraint is None: constraint = "*" if not isinstance(constraint, VersionConstraint): constraint = parse_constraint(constraint) allow_prereleases = dependency.allows_prereleases() if isinstance(constraint, VersionRange): if ( constraint.max is not None and constraint.max.is_prerelease() or constraint.min is not None and constraint.min.is_prerelease() ): allow_prereleases = True try: info = self.get_package_info(dependency.name) except PackageNotFound: self._log( "No packages found for {} {}".format(dependency.name, str(constraint)), level="debug", ) return [] packages = [] ignored_pre_release_packages = [] for version, release in info["releases"].items(): if not release: # Bad release self._log( "No release information found for {}-{}, skipping".format( dependency.name, version ), level="debug", ) continue try: package = Package(info["info"]["name"], version) except ParseVersionError: self._log( 'Unable to parse version "{}" for the {} package, skipping'.format( version, dependency.name ), level="debug", ) continue if package.is_prerelease() and not allow_prereleases: if constraint.is_any(): # we need this when all versions of the package are pre-releases ignored_pre_release_packages.append(package) continue if not constraint or (constraint and constraint.allows(package.version)): packages.append(package) self._log( "{} packages found for {} {}".format( len(packages), dependency.name, str(constraint) ), level="debug", ) return packages or ignored_pre_release_packages def package( self, name, # type: str version, # type: str extras=None, # type: (Union[list, None]) ): # type: (...) -> Package return self.get_release_info(name, version).to_package(name=name, extras=extras) def search(self, query): results = [] search = {"q": query} response = requests.session().get(self._base_url + "search", params=search) content = parse(response.content, namespaceHTMLElements=False) for result in content.findall(".//*[@class='package-snippet']"): name = result.find("h3/*[@class='package-snippet__name']").text version = result.find("h3/*[@class='package-snippet__version']").text if not name or not version: continue description = result.find("p[@class='package-snippet__description']").text if not description: description = "" try: result = Package(name, version, description) result.description = to_str(description.strip()) results.append(result) except ParseVersionError: self._log( 'Unable to parse version "{}" for the {} package, skipping'.format( version, name ), level="debug", ) return results def get_package_info(self, name): # type: (str) -> dict """ Return the package information given its name. The information is returned from the cache if it exists or retrieved from the remote server. """ if self._disable_cache: return self._get_package_info(name) return self._cache.store("packages").remember_forever( name, lambda: self._get_package_info(name) ) def _get_package_info(self, name): # type: (str) -> dict data = self._get("pypi/{}/json".format(name)) if data is None: raise PackageNotFound("Package [{}] not found.".format(name)) return data def get_release_info(self, name, version): # type: (str, str) -> PackageInfo """ Return the release information given a package name and a version. The information is returned from the cache if it exists or retrieved from the remote server. """ if self._disable_cache: return PackageInfo.load(self._get_release_info(name, version)) cached = self._cache.remember_forever( "{}:{}".format(name, version), lambda: self._get_release_info(name, version) ) cache_version = cached.get("_cache_version", "0.0.0") if parse_constraint(cache_version) != self.CACHE_VERSION: # The cache must be updated self._log( "The cache for {} {} is outdated. Refreshing.".format(name, version), level="debug", ) cached = self._get_release_info(name, version) self._cache.forever("{}:{}".format(name, version), cached) return PackageInfo.load(cached) def find_links_for_package(self, package): json_data = self._get("pypi/{}/{}/json".format(package.name, package.version)) if json_data is None: return [] links = [] for url in json_data["urls"]: h = "sha256={}".format(url["digests"]["sha256"]) links.append(Link(url["url"] + "#" + h)) return links def _get_release_info(self, name, version): # type: (str, str) -> dict self._log("Getting info for {} ({}) from PyPI".format(name, version), "debug") json_data = self._get("pypi/{}/{}/json".format(name, version)) if json_data is None: raise PackageNotFound("Package [{}] not found.".format(name)) info = json_data["info"] data = PackageInfo( name=info["name"], version=info["version"], summary=info["summary"], platform=info["platform"], requires_dist=info["requires_dist"], requires_python=info["requires_python"], files=info.get("files", []), cache_version=str(self.CACHE_VERSION), ) try: version_info = json_data["releases"][version] except KeyError: version_info = [] for file_info in version_info: data.files.append( { "file": file_info["filename"], "hash": "sha256:" + file_info["digests"]["sha256"], } ) if self._fallback and data.requires_dist is None: self._log("No dependencies found, downloading archives", level="debug") # No dependencies set (along with other information) # This might be due to actually no dependencies # or badly set metadata when uploading # So, we need to make sure there is actually no # dependencies by introspecting packages urls = defaultdict(list) for url in json_data["urls"]: # Only get sdist and wheels if they exist dist_type = url["packagetype"] if dist_type not in ["sdist", "bdist_wheel"]: continue urls[dist_type].append(url["url"]) if not urls: return data.asdict() info = self._get_info_from_urls(urls) data.requires_dist = info.requires_dist if not data.requires_python: data.requires_python = info.requires_python return data.asdict() def _get(self, endpoint): # type: (str) -> Union[dict, None] try: json_response = self.session.get(self._base_url + endpoint) except requests.exceptions.TooManyRedirects: # Cache control redirect loop. # We try to remove the cache and try again self._cache_control_cache.delete(self._base_url + endpoint) json_response = self.session.get(self._base_url + endpoint) if json_response.status_code == 404: return None json_data = json_response.json() return json_data def _get_info_from_urls(self, urls): # type: (Dict[str, List[str]]) -> PackageInfo # Checking wheels first as they are more likely to hold # the necessary information if "bdist_wheel" in urls: # Check fo a universal wheel wheels = urls["bdist_wheel"] universal_wheel = None universal_python2_wheel = None universal_python3_wheel = None platform_specific_wheels = [] for wheel in wheels: link = Link(wheel) m = wheel_file_re.match(link.filename) if not m: continue pyver = m.group("pyver") abi = m.group("abi") plat = m.group("plat") if abi == "none" and plat == "any": # Universal wheel if pyver == "py2.py3": # Any Python universal_wheel = wheel elif pyver == "py2": universal_python2_wheel = wheel else: universal_python3_wheel = wheel else: platform_specific_wheels.append(wheel) if universal_wheel is not None: return self._get_info_from_wheel(universal_wheel) info = None if universal_python2_wheel and universal_python3_wheel: info = self._get_info_from_wheel(universal_python2_wheel) py3_info = self._get_info_from_wheel(universal_python3_wheel) if py3_info.requires_dist: if not info.requires_dist: info.requires_dist = py3_info.requires_dist return info py2_requires_dist = set( dependency_from_pep_508(r).to_pep_508() for r in info.requires_dist ) py3_requires_dist = set( dependency_from_pep_508(r).to_pep_508() for r in py3_info.requires_dist ) base_requires_dist = py2_requires_dist & py3_requires_dist py2_only_requires_dist = py2_requires_dist - py3_requires_dist py3_only_requires_dist = py3_requires_dist - py2_requires_dist # Normalizing requires_dist requires_dist = list(base_requires_dist) for requirement in py2_only_requires_dist: dep = dependency_from_pep_508(requirement) dep.marker = dep.marker.intersect( parse_marker("python_version == '2.7'") ) requires_dist.append(dep.to_pep_508()) for requirement in py3_only_requires_dist: dep = dependency_from_pep_508(requirement) dep.marker = dep.marker.intersect( parse_marker("python_version >= '3'") ) requires_dist.append(dep.to_pep_508()) info.requires_dist = sorted(list(set(requires_dist))) if info: return info # Prefer non platform specific wheels if universal_python3_wheel: return self._get_info_from_wheel(universal_python3_wheel) if universal_python2_wheel: return self._get_info_from_wheel(universal_python2_wheel) if platform_specific_wheels and "sdist" not in urls: # Pick the first wheel available and hope for the best return self._get_info_from_wheel(platform_specific_wheels[0]) return self._get_info_from_sdist(urls["sdist"][0]) def _get_info_from_wheel(self, url): # type: (str) -> PackageInfo self._log( "Downloading wheel: {}".format(urlparse.urlparse(url).path.rsplit("/")[-1]), level="debug", ) filename = os.path.basename(urlparse.urlparse(url).path.rsplit("/")[-1]) with temporary_directory() as temp_dir: filepath = Path(temp_dir) / filename self._download(url, str(filepath)) return PackageInfo.from_wheel(filepath) def _get_info_from_sdist(self, url): # type: (str) -> PackageInfo self._log( "Downloading sdist: {}".format(urlparse.urlparse(url).path.rsplit("/")[-1]), level="debug", ) filename = os.path.basename(urlparse.urlparse(url).path) with temporary_directory() as temp_dir: filepath = Path(temp_dir) / filename self._download(url, str(filepath)) return PackageInfo.from_sdist(filepath) def _download(self, url, dest): # type: (str, str) -> None return download_file(url, dest, session=self.session) def _log(self, msg, level="info"): getattr(logger, level)("<debug>{}:</debug> {}".format(self._name, msg))
def handle(self) -> int: from cachy import CacheManager cache = self.argument("cache") parts = cache.split(":") root = parts[0] config = Config.create() cache_dir = config.repository_cache_directory / root try: cache_dir.relative_to(config.repository_cache_directory) except ValueError: raise ValueError(f"{root} is not a valid repository cache") cache = CacheManager({ "default": parts[0], "serializer": "json", "stores": { parts[0]: { "driver": "file", "path": str(cache_dir) } }, }) if len(parts) == 1: if not self.option("all"): raise RuntimeError( f"Add the --all option if you want to clear all {parts[0]} caches" ) if not os.path.exists(str(cache_dir)): self.line(f"No cache entries for {parts[0]}") return 0 # Calculate number of entries entries_count = sum( len(files) for _path, _dirs, files in os.walk(str(cache_dir))) delete = self.confirm( f"<question>Delete {entries_count} entries?</>") if not delete: return 0 cache.flush() elif len(parts) == 2: raise RuntimeError( "Only specifying the package name is not yet supported. " "Add a specific version to clear") elif len(parts) == 3: package = parts[1] version = parts[2] if not cache.has(f"{package}:{version}"): self.line(f"No cache entries for {package}:{version}") return 0 delete = self.confirm(f"Delete cache entry {package}:{version}") if not delete: return 0 cache.forget(f"{package}:{version}") else: raise ValueError("Invalid cache key") return 0
class LegacyRepository(PyPiRepository): def __init__(self, name, url, auth=None, disable_cache=False ): # type: (str, str, Optional[Auth], bool) -> None if name == "pypi": raise ValueError("The name [pypi] is reserved for repositories") self._packages = [] self._name = name self._url = url.rstrip("/") self._auth = auth self._inspector = Inspector() self._cache_dir = Path(CACHE_DIR) / "cache" / "repositories" / name self._cache = CacheManager({ "default": "releases", "serializer": "json", "stores": { "releases": { "driver": "file", "path": str(self._cache_dir) }, "packages": { "driver": "dict" }, "matches": { "driver": "dict" }, }, }) self._session = CacheControl(requests.session(), cache=FileCache( str(self._cache_dir / "_http"))) url_parts = urlparse.urlparse(self._url) if not url_parts.username and self._auth: self._session.auth = self._auth self._disable_cache = disable_cache @property def authenticated_url(self): # type: () -> str if not self._auth: return self.url parsed = urlparse.urlparse(self.url) return "{scheme}://{username}:{password}@{netloc}{path}".format( scheme=parsed.scheme, username=self._auth.auth.username, password=self._auth.auth.password, netloc=parsed.netloc, path=parsed.path, ) def find_packages(self, name, constraint=None, extras=None, allow_prereleases=False): packages = [] if constraint is None: constraint = "*" if not isinstance(constraint, VersionConstraint): constraint = parse_constraint(constraint) if isinstance(constraint, VersionRange): if (constraint.max is not None and constraint.max.is_prerelease() or constraint.min is not None and constraint.min.is_prerelease()): allow_prereleases = True key = name if not constraint.is_any(): key = "{}:{}".format(key, str(constraint)) if self._cache.store("matches").has(key): versions = self._cache.store("matches").get(key) else: page = self._get("/{}/".format( canonicalize_name(name).replace(".", "-"))) if page is None: return [] versions = [] for version in page.versions: if version.is_prerelease() and not allow_prereleases: continue if constraint.allows(version): versions.append(version) self._cache.store("matches").put(key, versions, 5) for version in versions: package = Package(name, version) package.source_type = "legacy" package.source_url = self._url if extras is not None: package.requires_extras = extras packages.append(package) self._log( "{} packages found for {} {}".format(len(packages), name, str(constraint)), level="debug", ) return packages def package(self, name, version, extras=None): # type: (...) -> poetry.packages.Package """ Retrieve the release information. This is a heavy task which takes time. We have to download a package to get the dependencies. We also need to download every file matching this release to get the various hashes. Note that, this will be cached so the subsequent operations should be much faster. """ try: index = self._packages.index( poetry.packages.Package(name, version, version)) return self._packages[index] except ValueError: if extras is None: extras = [] release_info = self.get_release_info(name, version) package = poetry.packages.Package(name, version, version) if release_info["requires_python"]: package.python_versions = release_info["requires_python"] package.source_type = "legacy" package.source_url = self._url package.source_reference = self.name requires_dist = release_info["requires_dist"] or [] for req in requires_dist: try: dependency = dependency_from_pep_508(req) except InvalidMarker: # Invalid marker # We strip the markers hoping for the best req = req.split(";")[0] dependency = dependency_from_pep_508(req) except ValueError: # Likely unable to parse constraint so we skip it self._log( "Invalid constraint ({}) found in {}-{} dependencies, " "skipping".format(req, package.name, package.version), level="debug", ) continue if dependency.in_extras: for extra in dependency.in_extras: if extra not in package.extras: package.extras[extra] = [] package.extras[extra].append(dependency) if not dependency.is_optional(): package.requires.append(dependency) # Adding description package.description = release_info.get("summary", "") # Adding hashes information package.hashes = release_info["digests"] # Activate extra dependencies for extra in extras: if extra in package.extras: for dep in package.extras[extra]: dep.activate() package.requires += package.extras[extra] self._packages.append(package) return package def _get_release_info(self, name, version): # type: (str, str) -> dict page = self._get("/{}/".format( canonicalize_name(name).replace(".", "-"))) if page is None: raise PackageNotFound('No package named "{}"'.format(name)) data = { "name": name, "version": version, "summary": "", "requires_dist": [], "requires_python": None, "digests": [], "_cache_version": str(self.CACHE_VERSION), } links = list(page.links_for_version(Version.parse(version))) if not links: raise PackageNotFound( 'No valid distribution links found for package: "{}" version: "{}"' .format(name, version)) urls = defaultdict(list) hashes = [] for link in links: if link.is_wheel: urls["bdist_wheel"].append(link.url) elif link.filename.endswith( (".tar.gz", ".zip", ".bz2", ".xz", ".Z", ".tar")): urls["sdist"].append(link.url) hash = link.hash if link.hash_name == "sha256": hashes.append(hash) elif hash: hashes.append(link.hash_name + ":" + hash) data["digests"] = hashes info = self._get_info_from_urls(urls) data["summary"] = info["summary"] data["requires_dist"] = info["requires_dist"] data["requires_python"] = info["requires_python"] return data def _download(self, url, dest): # type: (str, str) -> None r = self._session.get(url, stream=True) with open(dest, "wb") as f: for chunk in r.iter_content(chunk_size=1024): if chunk: f.write(chunk) def _get(self, endpoint): # type: (str) -> Union[Page, None] url = self._url + endpoint response = self._session.get(url) if response.status_code == 404: return return Page(url, response.content, response.headers)
class PyPiRepository(RemoteRepository): CACHE_VERSION = parse_constraint("1.0.0") def __init__(self, url="https://pypi.org/", disable_cache=False, fallback=True): super(PyPiRepository, self).__init__(url.rstrip("/") + "/simple/") self._base_url = url self._disable_cache = disable_cache self._fallback = fallback release_cache_dir = Path(CACHE_DIR) / "cache" / "repositories" / "pypi" self._cache = CacheManager({ "default": "releases", "serializer": "json", "stores": { "releases": { "driver": "file", "path": str(release_cache_dir) }, "packages": { "driver": "dict" }, }, }) self._cache_control_cache = FileCache(str(release_cache_dir / "_http")) self._session = CacheControl(session(), cache=self._cache_control_cache) self._inspector = Inspector() self._name = "PyPI" def find_packages( self, name, # type: str constraint=None, # type: Union[VersionConstraint, str, None] extras=None, # type: Union[list, None] allow_prereleases=False, # type: bool ): # type: (...) -> List[Package] """ Find packages on the remote server. """ if constraint is None: constraint = "*" if not isinstance(constraint, VersionConstraint): constraint = parse_constraint(constraint) if isinstance(constraint, VersionRange): if (constraint.max is not None and constraint.max.is_prerelease() or constraint.min is not None and constraint.min.is_prerelease()): allow_prereleases = True try: info = self.get_package_info(name) except PackageNotFound: self._log( "No packages found for {} {}".format(name, str(constraint)), level="debug", ) return [] packages = [] for version, release in info["releases"].items(): if not release: # Bad release self._log( "No release information found for {}-{}, skipping".format( name, version), level="debug", ) continue try: package = Package(name, version) except ParseVersionError: self._log( 'Unable to parse version "{}" for the {} package, skipping' .format(version, name), level="debug", ) continue if package.is_prerelease() and not allow_prereleases: continue if not constraint or (constraint and constraint.allows(package.version)): if extras is not None: package.requires_extras = extras packages.append(package) self._log( "{} packages found for {} {}".format(len(packages), name, str(constraint)), level="debug", ) return packages def package( self, name, # type: str version, # type: str extras=None, # type: (Union[list, None]) ): # type: (...) -> Union[Package, None] if extras is None: extras = [] release_info = self.get_release_info(name, version) package = Package(name, version, version) requires_dist = release_info["requires_dist"] or [] for req in requires_dist: try: dependency = dependency_from_pep_508(req) except InvalidMarker: # Invalid marker # We strip the markers hoping for the best req = req.split(";")[0] dependency = dependency_from_pep_508(req) except ValueError: # Likely unable to parse constraint so we skip it self._log( "Invalid constraint ({}) found in {}-{} dependencies, " "skipping".format(req, package.name, package.version), level="debug", ) continue if dependency.in_extras: for extra in dependency.in_extras: if extra not in package.extras: package.extras[extra] = [] package.extras[extra].append(dependency) if not dependency.is_optional(): package.requires.append(dependency) # Adding description package.description = release_info.get("summary", "") if release_info["requires_python"]: package.python_versions = release_info["requires_python"] if release_info["platform"]: package.platform = release_info["platform"] # Adding hashes information package.files = release_info["files"] # Activate extra dependencies for extra in extras: if extra in package.extras: for dep in package.extras[extra]: dep.activate() package.requires += package.extras[extra] return package def search(self, query): results = [] search = {"q": query} response = session().get(self._base_url + "search", params=search) content = parse(response.content, namespaceHTMLElements=False) for result in content.findall(".//*[@class='package-snippet']"): name = result.find("h3/*[@class='package-snippet__name']").text version = result.find( "h3/*[@class='package-snippet__version']").text if not name or not version: continue description = result.find( "p[@class='package-snippet__description']").text if not description: description = "" try: result = Package(name, version, description) result.description = to_str(description.strip()) results.append(result) except ParseVersionError: self._log( 'Unable to parse version "{}" for the {} package, skipping' .format(version, name), level="debug", ) return results def get_package_info(self, name): # type: (str) -> dict """ Return the package information given its name. The information is returned from the cache if it exists or retrieved from the remote server. """ if self._disable_cache: return self._get_package_info(name) return self._cache.store("packages").remember_forever( name, lambda: self._get_package_info(name)) def _get_package_info(self, name): # type: (str) -> dict data = self._get("pypi/{}/json".format(name)) if data is None: raise PackageNotFound("Package [{}] not found.".format(name)) return data def get_release_info(self, name, version): # type: (str, str) -> dict """ Return the release information given a package name and a version. The information is returned from the cache if it exists or retrieved from the remote server. """ if self._disable_cache: return self._get_release_info(name, version) cached = self._cache.remember_forever( "{}:{}".format(name, version), lambda: self._get_release_info(name, version)) cache_version = cached.get("_cache_version", "0.0.0") if parse_constraint(cache_version) != self.CACHE_VERSION: # The cache must be updated self._log( "The cache for {} {} is outdated. Refreshing.".format( name, version), level="debug", ) cached = self._get_release_info(name, version) self._cache.forever("{}:{}".format(name, version), cached) return cached def _get_release_info(self, name, version): # type: (str, str) -> dict self._log("Getting info for {} ({}) from PyPI".format(name, version), "debug") json_data = self._get("pypi/{}/{}/json".format(name, version)) if json_data is None: raise PackageNotFound("Package [{}] not found.".format(name)) info = json_data["info"] data = { "name": info["name"], "version": info["version"], "summary": info["summary"], "platform": info["platform"], "requires_dist": info["requires_dist"], "requires_python": info["requires_python"], "files": [], "_cache_version": str(self.CACHE_VERSION), } try: version_info = json_data["releases"][version] except KeyError: version_info = [] for file_info in version_info: data["files"].append({ "file": file_info["filename"], "hash": "sha256:" + file_info["digests"]["sha256"], }) if self._fallback and data["requires_dist"] is None: self._log("No dependencies found, downloading archives", level="debug") # No dependencies set (along with other information) # This might be due to actually no dependencies # or badly set metadata when uploading # So, we need to make sure there is actually no # dependencies by introspecting packages urls = defaultdict(list) url_requires_python_dict = {} for url in json_data["urls"]: # Only get sdist and wheels if they exist dist_type = url["packagetype"] if dist_type not in ["sdist", "bdist_wheel"]: continue urls[dist_type].append(url["url"]) url_requires_python_dict[url["url"]] = url["requires_python"] if not urls: return data info = self._get_info_from_urls(urls, url_requires_python_dict) data["requires_dist"] = info["requires_dist"] if not data["requires_python"]: data["requires_python"] = info["requires_python"] return data def _get(self, endpoint): # type: (str) -> Union[dict, None] try: json_response = self._session.get(self._base_url + endpoint) except TooManyRedirects: # Cache control redirect loop. # We try to remove the cache and try again self._cache_control_cache.delete(self._base_url + endpoint) json_response = self._session.get(self._base_url + endpoint) if json_response.status_code == 404: return None json_data = json_response.json() return json_data def _get_info_from_urls( self, urls, url_requires_python_dict ): # type: (Dict[str, List[str]], Dict[str, Union[str, None]]) -> Dict[str, Union[str, List, None]] # If requires_python exists in anchor of link, apply it to the release info (PEP503) def _get_info_with_url_requires_python(url, get_info_handler): info = get_info_handler(url) if url_requires_python_dict[url] and not info["requires_python"]: info["requires_python"] = url_requires_python_dict[url] return info # Checking wheels first as they are more likely to hold # the necessary information if "bdist_wheel" in urls: # Check fo a universal wheel wheels = urls["bdist_wheel"] universal_wheel = None universal_python2_wheel = None universal_python3_wheel = None platform_specific_wheels = [] for wheel in wheels: link = Link(wheel) m = wheel_file_re.match(link.filename) if not m: continue pyver = m.group("pyver") abi = m.group("abi") plat = m.group("plat") if abi == "none" and plat == "any": # Universal wheel if pyver == "py2.py3": # Any Python universal_wheel = wheel elif pyver == "py2": universal_python2_wheel = wheel else: universal_python3_wheel = wheel else: platform_specific_wheels.append(wheel) if universal_wheel is not None: return _get_info_with_url_requires_python( universal_wheel, self._get_info_from_wheel) info = {} if universal_python2_wheel and universal_python3_wheel: info = _get_info_with_url_requires_python( universal_python2_wheel, self._get_info_from_wheel) py3_info = _get_info_with_url_requires_python( universal_python3_wheel, self._get_info_from_wheel) if py3_info["requires_dist"]: if not info["requires_dist"]: info["requires_dist"] = py3_info["requires_dist"] return info py2_requires_dist = set( dependency_from_pep_508(r).to_pep_508() for r in info["requires_dist"]) py3_requires_dist = set( dependency_from_pep_508(r).to_pep_508() for r in py3_info["requires_dist"]) base_requires_dist = py2_requires_dist & py3_requires_dist py2_only_requires_dist = py2_requires_dist - py3_requires_dist py3_only_requires_dist = py3_requires_dist - py2_requires_dist # Normalizing requires_dist requires_dist = list(base_requires_dist) for requirement in py2_only_requires_dist: dep = dependency_from_pep_508(requirement) dep.marker = dep.marker.intersect( parse_marker("python_version == '2.7'")) requires_dist.append(dep.to_pep_508()) for requirement in py3_only_requires_dist: dep = dependency_from_pep_508(requirement) dep.marker = dep.marker.intersect( parse_marker("python_version >= '3'")) requires_dist.append(dep.to_pep_508()) info["requires_dist"] = sorted(list(set(requires_dist))) if info: return info # Prefer non platform specific wheels if universal_python3_wheel: return _get_info_with_url_requires_python( universal_python3_wheel, self._get_info_from_wheel) if universal_python2_wheel: return _get_info_with_url_requires_python( universal_python2_wheel, self._get_info_from_wheel) if platform_specific_wheels and "sdist" not in urls: # Pick the first wheel available and hope for the best return _get_info_with_url_requires_python( platform_specific_wheels[0], self._get_info_from_wheel) return _get_info_with_url_requires_python(urls["sdist"][0], self._get_info_from_sdist) def _get_info_from_wheel( self, url): # type: (str) -> Dict[str, Union[str, List, None]] self._log( "Downloading wheel: {}".format( urlparse.urlparse(url).path.rsplit("/")[-1]), level="debug", ) filename = os.path.basename( urlparse.urlparse(url).path.rsplit("/")[-1]) with temporary_directory() as temp_dir: filepath = Path(temp_dir) / filename self._download(url, str(filepath)) return self._inspector.inspect_wheel(filepath) def _get_info_from_sdist( self, url): # type: (str) -> Dict[str, Union[str, List, None]] self._log( "Downloading sdist: {}".format( urlparse.urlparse(url).path.rsplit("/")[-1]), level="debug", ) filename = os.path.basename(urlparse.urlparse(url).path) with temporary_directory() as temp_dir: filepath = Path(temp_dir) / filename self._download(url, str(filepath)) return self._inspector.inspect_sdist(filepath) def _download(self, url, dest): # type: (str, str) -> None r = get(url, stream=True) r.raise_for_status() with open(dest, "wb") as f: for chunk in r.iter_content(chunk_size=1024): if chunk: f.write(chunk) def _log(self, msg, level="info"): getattr(logger, level)("<comment>{}:</comment> {}".format(self._name, msg))