class VCSRepo(SpecObject): """Base VCS repo class""" path = attrib(default=attr.NOTHING) files = attrib(default=attr.Factory(list)) @property def diff_identity_string(self): return super(VCSRepo, self).diff_identity_string + ' (%s)' % self.path @property def diff_subidentity_string(self): return super(VCSRepo, self).diff_subidentity_string + ' (%s)' % self.path @property def commit(self): try: return getattr(self, self._commit_attribute) except AttributeError: # raised if _commit_attribute is not defined, but this means # (to the caller) that commit is not defined, so we give the # caller a more appropriate error message msg = "%s instance has no attribute 'commit'" % self.__class__ raise AttributeError(msg)
class Shell(Resource): # Container properties name = attrib(default=attr.NOTHING) id = attrib() type = attrib(default='shell') # TODO: standardize status outputs # "available" is chosen in favor of "running", which is to be used # e.g. if we know that this shell is currently in use. status = attrib(default='available') def create(self): """ Create a running environment. Yields ------- dict : config parameters to capture in the inventory file """ # Generic logic to reside in Resource??? if self.id is None: self.id = Resource._generate_id() yield {'id': self.id, 'status': 'available'} def connect(self): """ Connect to an existing environment. """ return self def delete(self): """ Remove this environment from the backend. """ return def start(self): """ Start this environment in the backend. """ # Not a shell feature raise NotImplementedError def stop(self): """ Stop this environment in the backend. """ # Not a shell feature raise NotImplementedError def get_session(self, pty=False, shared=None): """ Log into a container and get the command line """ if pty: raise NotImplementedError if shared: raise NotImplementedError return ShellSession()
class SVNRepo(VCSRepo): revision = attrib() url = attrib() root_url = attrib() relative_url = attrib() uuid = attrib() _diff_cmp_fields = ('uuid', ) _diff_fields = ('revision', ) _commit_attribute = 'revision'
class APTSource(SpecObject): """APT origin information """ name = attrib(default=attr.NOTHING) component = attrib() archive = attrib() architecture = attrib() codename = attrib() origin = attrib() label = attrib() site = attrib() archive_uri = attrib() date = attrib()
class VenvDistribution(Distribution): """Class to provide virtualenv-based "distributions". """ path = attrib() venv_version = attrib() environments = TypedList(VenvEnvironment) def initiate(self, _): return @borrowdoc(Distribution) def install_packages(self, session=None): session = session or get_local_session() for env in self.environments: # TODO: Deal with system and editable packages. to_install = [ "{p.name}=={p.version}".format(p=p) for p in env.packages if p.local and not p.editable ] if not to_install: lgr.info("No local, non-editable packages found") continue # TODO: Right now we just use the python to invoke "virtualenv # --python=..." when the directory doesn't exist, but we should # eventually use the yet-to-exist "satisfies" functionality to # check whether an existing virtual environment has the right # python (and maybe other things). pyver = "{v.major}.{v.minor}".format( v=parse_semantic_version(env.python_version)) if not session.exists(env.path): # The location and version of virtualenv are recorded at the # time of tracing, but should we use these values? For now, # use a plain "virtualenv" below on the basis that we just use # "apt-get" and "git" elsewhere. session.execute_command([ "virtualenv", "--python=python{}".format(pyver), env.path ]) list( execute_command_batch(session, [env.path + "/bin/pip", "install"], to_install))
class VenvPackage(Package): name = attrib(default=attr.NOTHING) version = attrib(default=attr.NOTHING) local = attrib(default=False) location = attrib() editable = attrib(default=False) files = attrib(default=attr.Factory(list))
class EnvironmentSpec(SpecObject): base = attrib( ) # ??? to define specifics of the system, possibly a docker base distributions = TypedList(Distribution) # list of distributions files = attrib(default=Factory(list)) # list of other files # runs? whenever we get to provisioning executions # those would also be useful for tracing for presence of distributions # e.g. depending on what is in the PATH def get_distribution(self, dtype): """get_distribution(dtype) -> distribution Returns the distribution of the specified type in the given environment. Returns None if there are no matching distributions. Raises ValueError if there is more than one matching distribution. """ dist = None for d in self.distributions: if isinstance(d, dtype): if dist: raise ValueError('multiple %s found' % str(dtype)) dist = d return dist
class DebianReleaseSpec(object): """Encapsulates knowledge about used Debian release origin: Debian label: Debian suite: stable version: 8.5 codename: jessie date: Sat, 04 Jun 2016 13:24:54 UTC components: main contrib non-free architectures: amd64 """ # Those could be different in overlays or other derived distributions origin = attrib(default=attr.NOTHING) # Debian label = attrib(default=attr.NOTHING) # Debian codename = attrib(default=attr.NOTHING) suite = attrib(default=attr.NOTHING) version = attrib(default=attr.NOTHING) date = attrib(default=attr.NOTHING) components = attrib(default=attr.NOTHING) architectures = attrib(default=attr.NOTHING)
class GitRepo(VCSRepo): root_hexsha = attrib() branch = attrib() hexsha = attrib() describe = attrib() tracked_remote = attrib() remotes = attrib(default=attr.Factory(dict)) _diff_cmp_fields = ('root_hexsha', ) _diff_fields = ('hexsha', 'branch') _commit_attribute = 'hexsha' @property def diff_subidentity_string(self): return 'branch %s, commit %s (%s)' % (self.branch, self.hexsha, self.path)
def TypedList(type_): """A helper to generate an attribute which would be with list factory but also defining a type in its metadata """ return attrib(default=Factory(list), metadata={'type': type_})
class Distribution(SpecObject, metaclass=abc.ABCMeta): """Base class for distributions""" # Actually might want/need to go away since somewhat duplicates the class # name and looks awkward name = attrib(default=attr.NOTHING) @staticmethod def factory(distribution_type, provenance=None): """ Factory method for creating the appropriate Orchestrator sub-class based on format type. Parameters ---------- distribution_type : string Type of distribution subclass to create. Current options are: 'conda', 'debian', 'neurodebian', 'pypi', 'redhat' provenance : dict Keyword args to be passed to initialize class instance Returns ------- distribution : object Distribution class or its instance (when provenance is not None) """ # Handle distributions that don't follow the assumed naming structure. special_dists = {"svn": "SVNDistribution"} special_modules = {"git": "vcs", "svn": "vcs"} dlower = distribution_type.lower() class_name = special_dists.get(dlower, dlower.capitalize() + 'Distribution') module = import_module('reproman.distributions.' + special_modules.get(dlower, dlower)) class_ = getattr(module, class_name) return class_ if provenance is None else class_(**provenance) @abc.abstractmethod def initiate(self, session): """ Perform any initialization commands needed in the environment. Parameters ---------- session : object The Session to work in """ return @abc.abstractmethod def install_packages(self, session=None): """ Install the packages associated to this distribution by the provenance into the environment. Parameters ---------- session : object Session to work in """ return
class DEBPackage(Package): """Debian package information""" name = attrib(default=attr.NOTHING) # Optional upstream_name = attrib() version = attrib() architecture = attrib() source_name = attrib(hash=False) source_version = attrib(hash=False) size = attrib(hash=False) md5 = attrib(hash=False) sha1 = attrib(hash=False) sha256 = attrib(hash=False) versions = attrib(hash=False) # Hash ver_str -> [Array of source names] install_date = attrib(hash=False) files = attrib(default=attr.Factory(list), hash=False) _diff_cmp_fields = ('name', 'architecture') _diff_fields = ('version',) _comparison_fields = ('name', 'architecture', 'version')
class VenvEnvironment(SpecObject): path = attrib() python_version = attrib() packages = TypedList(VenvPackage)
class DebianDistribution(Distribution): """ Class to provide Debian-based shell commands. """ apt_sources = TypedList(APTSource) packages = TypedList(DEBPackage) version = attrib() # version as depicted by /etc/debian_version _collection_attribute = 'packages' def initiate(self, session): """ Perform any initialization commands needed in the environment. Parameters ---------- session : Session object The session to work in. """ lgr.debug("Adding Debian update to environment command list.") self._init_apt_sources(session) # TODO: run apt-get update only if new apt-sources returned in above # call OR apt-cache policy output is empty # TODO: make Check-Valid-Until not default lgr.info("Updating list of available via APT packages") session.execute_command(['apt-get', '-o', 'Acquire::Check-Valid-Until=false', 'update']) #session.execute_command(['apt-get', 'install', '-y', 'python-pip']) # session.set_env(DEBIAN_FRONTEND='noninteractive', this_session_only=True) def _init_apt_sources(self, session, apt_source_file='/etc/apt/sources.list.d/reproman.sources.list'): """ Update /etc/apt/sources if necessary based on source date. Parameters ---------- session : Session object apt_source_file: string Returns ------- list Relevant apt sources. Empty if no apt sources were specified """ repo_info = { 'Debian': { 'url': 'snapshot.debian.org', 'keyserver': None, 'key': None }, 'NeuroDebian': { 'url': 'snapshot-neuro.debian.net:5002', 'keyserver': 'hkp://pool.sks-keyservers.net:80', 'key': '0xA5D32F012649A5A9' } } sources = [s for s in self.apt_sources if s.origin in repo_info] # Create a new apt sources file if needed. if sources and not session.exists(apt_source_file): session.execute_command( "sh -c 'echo \"# ReproMan repo sources\" > {}'" .format(apt_source_file)) for source in sources: # Write snapshot repo to apt sources file. date = datetime.strptime(source.date.split('+')[0], "%Y-%m-%d %X") template = 'deb http://{}/archive/{}/{}/ {} main contrib non-free' source_line = template.format( repo_info[source.origin]['url'], source.origin.lower(), date.strftime("%Y%m%dT%H%M%SZ"), source.codename ) self._write_apt_sources(session, apt_source_file, source_line) # Write "next" snapshot repo to apt sources file. template_list_page = 'http://{}/archive/{}/{}/dists/{}/' url = template_list_page.format( repo_info[source.origin]['url'], source.origin.lower(), date.strftime("%Y%m%dT%H%M%SZ"), source.codename ) r = requests.get(url) m = re.search( '<a href="/archive/\w*debian/(\w+)/dists/\w+/">next change</a>', r.text) if m: source_line = template.format( repo_info[source.origin]['url'], source.origin.lower(), m.group(1), source.codename ) self._write_apt_sources(session, apt_source_file, source_line) # Add keyserver if needed. if repo_info[source.origin]['keyserver']: session.execute_command(['apt-key', 'adv', '--recv-keys', '--keyserver', repo_info[source.origin]['keyserver'], repo_info[source.origin]['key']]) return sources def _write_apt_sources(self, session, apt_source_file, source_line): """ Write a line to the /etc/apt/sources.d/ file Parameters ---------- session : Session object apt_source_file: string source_line: string """ command = "grep -q '{}' {}" out, line_not_found = session.execute_command(command.format( source_line, apt_source_file)) if line_not_found: lgr.debug("Adding line '{}' to {}".format(source_line, apt_source_file)) session.execute_command("sh -c 'echo {} >> {}'".format( source_line, apt_source_file)) def install_packages(self, session, use_version=True): """ Install the packages associated to this distribution by the provenance into the environment. Parameters ---------- session : object use_version : bool, optional Use version information if provided. TODO: support outside or deprecate """ package_specs = [] for package in self.packages: package_spec = package.name if use_version and package.version: package_spec += '=%s' % package.version package_specs.append(package_spec) # Doing in one shot to fail early if any of the versioned specs # couldn't be satisfied lgr.info( "Installing %s via APT", single_or_plural("package", "packages", len(package_specs), include_count=True) ) lgr.debug("Installing %s", ', '.join(package_specs)) session.execute_command( # TODO: Pull env out of provenance for this command. ['apt-get', 'install', '-y'] + package_specs , env={'DEBIAN_FRONTEND': 'noninteractive'} ) # TODO: react on message asking to run dpkg --configure -a def normalize(self): # TODO: # - among apt-source we could merge some together if we allow for # e.g. component (main, contrib, non-free) to be a list! but that # would make us require supporting flexible typing -- string or a list pass def __sub__(self, other): # the semantics of distribution subtraction are, for d1 - d2: # what is specified in d1 that is not specified in d2 # or how does d2 fall short of d1 # or what is in d1 that isn't satisfied by d2 return [ p for p in self.packages if not p.compare(other, mode='satisfied_by') ]
class CondaChannel(SpecObject): name = attrib(default=attr.NOTHING) url = attrib()
class CondaEnvironment(SpecObject): name = attrib(default=attr.NOTHING) path = attrib() packages = TypedList(CondaPackage) channels = TypedList(CondaChannel)
class CondaDistribution(Distribution): """ Class to provide Conda package management. """ path = attrib() conda_version = attrib() python_version = attrib() platform = attrib() environments = TypedList(CondaEnvironment) _cmp_field = ('path', ) def initiate(self, environment): """ Perform any initialization commands needed in the environment. Parameters ---------- environment : object The Environment sub-class object. """ # TODO Move conda installation here (environment is actually session) return def install_packages(self, session=None): """ Install the packages associated to this distribution by the provenance into the environment. Parameters ---------- session : object Environment sub-class instance. Raises ------ ValueError Unexpected conda platform or python version CommandError If unexpected error in install commands occurs """ if not self.path: # Permit empty conda config entry return if not session: session = get_local_session() # Use the session to make a temporary directory for our install files tmp_dir = session.mktmpdir() try: # Install Conda # See if Conda root path exists and if not, install Conda if not session.isdir(self.path): # TODO: Determine if we can detect miniconda vs anaconad miniconda_url = get_miniconda_url(self.platform, self.python_version) session.execute_command("curl %s -o %s/miniconda.sh" % (miniconda_url, tmp_dir)) # NOTE: miniconda.sh makes parent directories automatically session.execute_command("bash -b %s/miniconda.sh -b -p %s" % (tmp_dir, self.path)) ## Update root version of conda session.execute_command( "%s/bin/conda install -y conda=%s python=%s" % (self.path, self.conda_version, self.get_simple_python_version(self.python_version))) # Loop through non-root packages, creating the conda-env config for env in self.environments: export_contents = self.create_conda_export(env) with make_tempfile(export_contents) as local_config: remote_config = os.path.join(tmp_dir, env.name) session.put(local_config, remote_config) if not session.isdir(env.path): try: session.execute_command( "%s/bin/conda-env create -p %s -f %s " % (self.path, env.path, remote_config)) except CommandError: # Some conda versions seg fault so try to update session.execute_command( "%s/bin/conda-env update -p %s -f %s " % (self.path, env.path, remote_config)) else: session.execute_command( "%s/bin/conda-env update -p %s -f %s " % (self.path, env.path, remote_config)) finally: if tmp_dir: # Remove the tmp dir session.execute_command(["rm", "-R", tmp_dir]) return @property def packages(self): return [p for env in self.environments for p in env.packages] @staticmethod def get_simple_python_version(python_version): # Get the simple python version from the conda info string # Specifically, pull "major.minor.micro" from the full string # "major.minor.micro.releaselevel.serial" return ".".join(python_version.split(".", 3)[:3]) @staticmethod def format_conda_package(name, version=None, build=None, **_): # Note: Conda does not accept a build without a version return ("%s=%s=%s" % (name, version, build) if version and build else ("%s=%s" % (name, version) if version else "%s" % name)) @staticmethod def format_pip_package(name, version=None, **_): return ("%s==%s" % (name, version) if version else "%s" % name) @staticmethod def create_conda_export(env): # Collect the environment into a dictionary in the same manner as # https://github.com/conda/conda/blob/master/conda_env/env.py d = {} # TODO: The environment name should be discovered on retrace name = os.path.basename(os.path.normpath(env.path)) d["name"] = name # Collect channels d["channels"] = [c.name for c in env.channels] # Collect packages (dependencies) with no installer d["dependencies"] = [ CondaDistribution.format_conda_package(p.name, p.version, p.build) for p in env.packages if p.installer is None ] # p.get("name"), p.get("version"), p.get("build")) # Collect pip-installed dependencies pip_deps = [ CondaDistribution.format_pip_package(p.name, p.version) for p in env.packages if p.installer == "pip" ] if (pip_deps): d["dependencies"].append({"pip": pip_deps}) # Add the prefix d["prefix"] = env.path # Now dump the export as a yaml file return yaml.safe_dump(d, default_flow_style=False)
class CondaPackage(Package): name = attrib(default=attr.NOTHING) installer = attrib() version = attrib() build = attrib() channel_name = attrib() size = attrib() md5 = attrib() url = attrib() location = attrib() editable = attrib(default=False) files = attrib(default=attr.Factory(list)) _diff_cmp_fields = ('name', 'build') _diff_fields = ('version', )