def init_db(self, db_path): '''initialize the database, with the default database path or custom of the format sqlite:////scif/data/expfactory.db The custom path can be set with the environment variable SREGISTRY_DATABASE when a user creates the client, we must initialize this db the database should use the .singularity cache folder to cache layers and images, and .singularity/sregistry.db as a database ''' # Database Setup, use default if uri not provided self.database = 'sqlite:///%s' % db_path self.storage = SREGISTRY_STORAGE bot.debug("Database located at %s" % self.database) self.engine = create_engine(self.database, convert_unicode=True) self.session = scoped_session(sessionmaker(autocommit=False, autoflush=False, bind=self.engine)) Base.query = self.session.query_property() # import all modules here that might define models so that # they will be registered properly on the metadata. Otherwise # you will have to import them first before calling init_db() Base.metadata.create_all(bind=self.engine) self.Base = Base
def get(url, headers=None, token=None, data=None, return_json=True): """get will use requests to get a particular url """ bot.debug("GET %s" % url) return call( url, headers=headers, func=requests.get, data=data, return_json=return_json )
def get_credential_cache(): """if the user has specified settings to provide a cache for credentials files, initialize it. The root for the folder is created if it doesn't exist. The path for the specific client is returned, and it's not assumed to be either a folder or a file (this is up to the developer of the client). """ from sregistry.defaults import CREDENTIAL_CACHE, SREGISTRY_CLIENT client_credential_cache = None # Check 1: user can disable a credential cache on the client level if CREDENTIAL_CACHE is not None: env = "SREGISTRY_DISABLE_CREDENTIAL_%s" % SREGISTRY_CLIENT.upper() if os.environ.get(env) is not None: bot.debug("[%s] cache disabled" % SREGISTRY_CLIENT) CREDENTIAL_CACHE = None # Check 2: user can disable a credential cache on the client level if CREDENTIAL_CACHE is not None: if not os.path.exists(CREDENTIAL_CACHE): mkdir_p(CREDENTIAL_CACHE) client_credential_cache = "%s/%s" % (CREDENTIAL_CACHE, SREGISTRY_CLIENT) if client_credential_cache is not None: bot.debug("credentials cache") return client_credential_cache
def push(self, path, name, tag=None): """push an image to Google Cloud Storage, meaning uploading it Parameters ========== path: should correspond to an absolte image path (or derive it) name: should be the complete uri that the user has requested to push. tag: should correspond with an image tag. This is provided to mirror Docker """ path = os.path.abspath(path) bot.debug("PUSH %s" % path) if not os.path.exists(path): bot.exit("%s does not exist." % path) # This returns a data structure with collection, container, based on uri names = parse_image_name(remove_uri(name), tag=tag) if names["version"] is None: version = get_file_hash(path, "sha256") names = parse_image_name(remove_uri(name), tag=tag, version=version) # Update metadata with names metadata = self.get_metadata(path, names=names) manifest = self._upload(source=path, destination=names["storage"], metadata=metadata) print(manifest["mediaLink"])
def post(url, headers=None, data=None, return_json=True): """post will use requests to get a particular url """ bot.debug("POST %s" % url) return call( url, headers=headers, func=requests.post, data=data, return_json=return_json )
def push(self, path, name, tag=None): '''push an image to Singularity Registry path: should correspond to an absolte image path (or derive it) name: should be the complete uri that the user has requested to push. tag: should correspond with an image tag. This is provided to mirror Docker ''' path = os.path.abspath(path) bot.debug("PUSH %s" % path) if not os.path.exists(path): bot.error('%s does not exist.' %path) sys.exit(1) # This returns a data structure with collection, container, based on uri names = parse_image_name(remove_uri(name),tag=tag) # use Singularity client, if exists, to inspect to extract metadata metadata = self.get_metadata(path, names=names) # If you want a spinner bot.spinner.start() # do your push request here. Generally you want to except a KeyboardInterrupt # and give the user a status from the response bot.spinner.stop()
def get_template(name): """return a default template for some function in sregistry If there is no template, None is returned. Parameters ========== name: the name of the template to retrieve """ name = name.lower() templates = dict() templates["tarinfo"] = { "gid": 0, "uid": 0, "uname": "root", "gname": "root", "mode": 493, } if name in templates: bot.debug("Found template for %s" % (name)) return templates[name] else: bot.warning("Cannot find template %s" % (name))
def get_layer(self, image_id, repo_name, download_folder=None): '''download an image layer (.tar.gz) to a specified download folder. Parameters ========== download_folder: download to this folder. If not set, uses temp. repo_name: the image name (library/ubuntu) to retrieve ''' url = self._get_layerLink(repo_name, image_id) bot.verbose("Downloading layers from %s" % url) download_folder = get_tmpdir(download_folder) download_folder = "%s/%s.tar.gz" % (download_folder, image_id) # Update user what we are doing bot.debug("Downloading layer %s" % image_id) # Step 1: Download the layer atomically file_name = "%s.%s" % (download_folder, next(tempfile._get_candidate_names())) tar_download = self.download(url, file_name) try: shutil.move(tar_download, download_folder) except Exception: msg = "Cannot untar layer %s," % tar_download msg += " was there a problem with download?" bot.error(msg) sys.exit(1) return download_folder
def get_manifests(self, repo_name, digest=None): '''get_manifests calls get_manifest for each of the schema versions, including v2 and v1. Version 1 includes image layers and metadata, and version 2 must be parsed for a specific manifest, and the 2nd call includes the layers. If a digest is not provided latest is used. Parameters ========== repo_name: reference to the <username>/<repository>:<tag> to obtain digest: a tag or shasum version ''' if not hasattr(self, 'manifests'): self.manifests = {} # Obtain schema version 1 (metadata) and 2, and image config schemaVersions = ['v1', 'v2', 'config'] for schemaVersion in schemaVersions: manifest = self._get_manifest(repo_name, digest, schemaVersion) if manifest is not None: # If we don't have a config yet, try to get from version 2 manifest if schemaVersion == "v2" and "config" in manifest: bot.debug('Attempting to get config as blob in verison 2 manifest') url = self._get_layerLink(repo_name, manifest['config']['digest']) headers = {'Accept': manifest['config']['mediaType']} self.manifests['config'] = self._get(url, headers=headers) self.manifests[schemaVersion] = manifest return self.manifests
def build(self, repo, config=None, name=None, commit=None, tag="latest", recipe="Singularity", preview=False): '''trigger a build on Google Cloud (storage then compute) given a name recipe, and Github URI where the recipe can be found. Parameters ========== name: should be the complete uri that the user has requested to push. commit: a commit to use, not required, and can be parsed from URI repo: should correspond to a Github URL or (if undefined) used local repo. tag: a user specified tag, to take preference over tag in name config: The local config file to use. If the file doesn't exist, then we attempt looking up the config based on the name. recipe: If defined, limit builder to build a single recipe ''' bot.debug("BUILD %s" % repo) # Ensure that repo exists (200 response) if not self._healthy(repo): sys.exit(1) config = self._load_build_config(config) # If name not provided, parse name based on repository if name is None: name = '/'.join(repo.split('/')[-2:]) # This returns a data structure with collection, container, based on uri names = parse_image_name(remove_uri(name)) # First priority - user has provided a tag names['tag'] = tag or names['tag'] # If we still don't have custom tag, check the recipe if names['tag'] == "latest" and recipe != "Singularity": tag = get_recipe_tag(recipe) names = parse_image_name(remove_uri(name), tag=tag) # The commit is the version (after the @) commit = commit or names['version'] # Setup the build config = self._setup_build(name=names['url'], recipe=recipe, repo=repo, config=config, tag=tag, commit=commit) # The user only wants to preview the configuration if preview is True: return config # Otherwise, run the build! return self._run_build(config)
def push(self, path, name, tag=None): '''push an image to Google Cloud Storage, meaning uploading it path: should correspond to an absolte image path (or derive it) name: should be the complete uri that the user has requested to push. tag: should correspond with an image tag. This is provided to mirror Docker ''' path = os.path.abspath(path) bot.debug("PUSH %s" % path) if not os.path.exists(path): bot.error('%s does not exist.' % path) sys.exit(1) # This returns a data structure with collection, container, based on uri names = parse_image_name(remove_uri(name), tag=tag) if names['version'] is None: version = get_image_hash(path) names = parse_image_name(remove_uri(name), tag=tag, version=version) # Update metadata with names metadata = self.get_metadata(path, names=names) metadata = metadata['data'] metadata.update(names) manifest = self._upload(source=path, destination=names['storage'], metadata=metadata) print(manifest['mediaLink'])
def push(self, path, name, tag=None): '''push an image to an S3 endpoint''' path = os.path.abspath(path) bot.debug("PUSH %s" % path) if not os.path.exists(path): bot.exit('%s does not exist.' % path) # Extract the metadata names = parse_image_name(remove_uri(name), tag=tag) image_size = os.path.getsize(path) >> 20 # Create extra metadata, this is how we identify the image later # *important* bug in boto3 will return these capitalized # see https://github.com/boto/boto3/issues/1709 metadata = { 'sizemb': "%s" % image_size, 'client': 'sregistry', 'type': 'container' } ExtraArgs = {"Metadata": metadata} acl = self._get_and_update_setting('SREGISTRY_S3_OBJECT_ACL') if acl is not None: ExtraArgs['ACL'] = acl try: self.bucket.upload_file(path, names['storage'], ExtraArgs) except botocore.exceptions.ClientError as e: bot.exit( "Could not upload {} to bucket. Ensure you have sufficient permissions to put objects in the bucket (s3:PutObject), as well as modify the object ACL if SREGISTRY_S3_OBJECT_ACL is set (s3:PutObjectAcl): {}" .format(path, str(e)))
def get_cache(subfolder=None, quiet=False): '''get_cache will return the user's cache for singularity. :param subfolder: a subfolder in the cache base to retrieve, specifically ''' DISABLE_CACHE = convert2boolean( getenv("SINGULARITY_DISABLE_CACHE", default=False)) if DISABLE_CACHE: SINGULARITY_CACHE = tempfile.mkdtemp() else: userhome = pwd.getpwuid(os.getuid())[5] _cache = os.path.join(userhome, ".singularity") SINGULARITY_CACHE = getenv("SINGULARITY_CACHEDIR", default=_cache) # Clean up the path and create cache_base = clean_path(SINGULARITY_CACHE) # Does the user want to get a subfolder in cache base? if subfolder is not None: cache_base = "%s/%s" % (cache_base, subfolder) # Create the cache folder(s), if don't exist mkdir_p(cache_base) if not quiet: bot.debug("Cache folder set to %s" % cache_base) return cache_base
def pull(self, images, file_name=None, save=True, force=False, base=None, **kwargs): """pull an image from a docker hub. This is a (less than ideal) workaround that actually does the following: - creates a sandbox folder - adds docker layers, metadata folder, and custom metadata to it - converts to a squashfs image with build the docker manifests are stored with registry metadata. Parameters ========== images: refers to the uri given by the user to pull in the format <collection>/<namespace>. You should have an API that is able to retrieve a container based on parsing this uri. file_name: the user's requested name for the file. It can optionally be None if the user wants a default. save: if True, you should save the container to the database using self.add() base: the registry base, in case the client doesn't want to set in env. Returns ======= finished: a single container path, or list of paths """ if not isinstance(images, list): images = [images] bot.debug("Execution of PULL for %s images" % len(images)) # If used internally we want to return a list to the user. finished = [] for image in images: # 0. Update the base in case we aren't working with default base = self._update_base(image) q = parse_image_name(remove_uri(image), base=base) image_file = self._pull(file_name=file_name, save=save, force=force, names=q, kwargs=kwargs) finished.append(image_file) if len(finished) == 1: finished = finished[0] return finished
def delete(self, url, headers=None, return_json=True, default_headers=True): '''delete request, use with caution ''' bot.debug('DELETE %s' % url) return self._call(url, headers=headers, func=requests.delete, return_json=return_json, default_headers=default_headers)
def record(self, images, action='add'): '''record an image from an endpoint. This function is akin to a pull, but without retrieving the image. We only care about the list of images (uris) to look up, and then the action that the user wants to take Parameters ========== images: refers to the uri given by the user to pull in the format <collection>/<namespace>. You should have an API that is able to retrieve metadata for a container based on this url. action: the action to take with the record. By default we add it, meaning adding a record (metadata and file url) to the database. It is recommended to place the URL for the image download under the container.url field, and the metadata (the image manifest) should have a selfLink to indicate where it came from. ''' # Take a look at pull for an example of this logic. if not isinstance(images, list): images = [images] bot.debug('Execution of RECORD[%s] for %s images' % (action, len(images))) for image in images: q = parse_image_name(remove_uri(image)) # Verify image existence, and obtain id url = "..." # This should be some url for your endpoint to get metadata bot.debug('Retrieving manifest at %s' % url) # Get the manifest, add a selfLink to it (good practice) manifest = self._get(url) manifest['selfLink'] = url # versions are very important! Since we aren't downloading the file, # If you don't have a version in your manifest, don't add it to the uri. # you will likely need to customize this string formation to make the # expected uri as in <collection>/<namespace>:<tag>@<version> if manifest['version'] is not None: image_uri = "%s/%s:%s@%s" % (manifest['collection'], manifest['name'], manifest['tag'], manifest['version']) else: image_uri = "%s/%s:%s" % (manifest['collection'], manifest['name'], manifest['tag']) # We again use the "add" function, but we don't give an image path # so it's just added as a record container = self.add(image_name=image_uri, metadata=manifest, url=manifest['image'])
def stream( self, url, headers=None, stream_to=None, retry=True, default_headers=True, show_progress=True, ): """ stream is a get that will stream to file_name. This stream is intended to take a url and (optionally) a set of headers and file to stream to, and will generate a response with requests.get. Parameters ========== url: the url to do a requests.get to headers: any updated headers to use for the requets stream_to: the file to stream to show_progress: boolean to show progress bar retry: should the client retry? (intended for use after token refresh) by default we retry once after token refresh, then fail. """ bot.debug("GET %s" % url) # Ensure headers are present, update if not if headers is None: if self.headers is None: self._reset_headers() headers = self.headers.copy() response = requests.get(url, headers=headers, verify=self._verify(), stream=True) # Deal with token if necessary if response.status_code == 401 and retry is True: if hasattr(self, "_update_token"): self._update_token(response) return self.stream(url, headers, stream_to, retry=False, show_progress=show_progress) if response.status_code == 200: return self._stream(response, stream_to=stream_to, show_progress=show_progress) bot.exit("Problem with stream, response %s" % (response.status_code))
def put(self, url, headers=None, data=None, return_json=True, default_headers=True): """put request """ bot.debug("PUT %s" % url) return self._call( url, headers=headers, func=requests.put, data=data, return_json=return_json, default_headers=default_headers, )
def push(self, path, name, tag=None): '''push an image to your Storage. If the collection doesn't exist, it is created. Parameters ========== path: should correspond to an absolute image path (or derive it) name: should be the complete uri that the user has requested to push. tag: should correspond with an image tag. This is provided to mirror Docker ''' path = os.path.abspath(path) bot.debug("PUSH %s" % path) if not os.path.exists(path): bot.error('%s does not exist.' % path) sys.exit(1) # Parse image names names = parse_image_name(remove_uri(name), tag=tag) # Get the size of the file file_size = os.path.getsize(path) chunk_size = 4 * 1024 * 1024 storage_path = "/%s" % names['storage'] # Create / get the collection collection = self._get_or_create_collection(names['collection']) # The image name is the name followed by tag image_name = os.path.basename(names['storage']) # prepare the progress bar progress = 0 bot.show_progress(progress, file_size, length=35) # Put the (actual) container into the collection with open(path, 'rb') as F: self.conn.put_object(names['collection'], image_name, contents=F.read(), content_type='application/octet-stream') # Finish up bot.show_progress(iteration=file_size, total=file_size, length=35, carriage_return=True) # Newline to finish download sys.stdout.write('\n')
def stream(url, headers, stream_to=None, retry=True): """stream is a get that will stream to file_name. Since this is a worker task, it differs from the client provided version in that it requires headers. """ bot.debug("GET %s" % url) if DISABLE_SSL_CHECK is True: bot.warning("Verify of certificates disabled! ::TESTING USE ONLY::") # Ensure headers are present, update if not response = requests.get(url, headers=headers, verify=not DISABLE_SSL_CHECK, stream=True) # Deal with token if necessary if response.status_code == 401 and retry is True: headers = update_token(response, headers) return stream(url, headers, stream_to, retry=False) if response.status_code == 200: # Keep user updated with Progress Bar content_size = None if "Content-Length" in response.headers: progress = 0 content_size = int(response.headers["Content-Length"]) bot.show_progress(progress, content_size, length=35) chunk_size = 1 << 20 with open(stream_to, "wb") as filey: for chunk in response.iter_content(chunk_size=chunk_size): filey.write(chunk) if content_size is not None: progress += chunk_size bot.show_progress( iteration=progress, total=content_size, length=35, carriage_return=False, ) # Newline to finish download sys.stdout.write("\n") return stream_to bot.exit("Problem with stream, response %s" % response.status_code)
def stream(url, headers, stream_to=None, retry=True): '''stream is a get that will stream to file_name. Since this is a worker task, it differs from the client provided version in that it requires headers. ''' bot.debug("GET %s" % url) if DISABLE_SSL_CHECK is True: bot.warning('Verify of certificates disabled! ::TESTING USE ONLY::') # Ensure headers are present, update if not response = requests.get(url, headers=headers, verify=not DISABLE_SSL_CHECK, stream=True) # If we get permissions error, one more try with updated token if response.status_code in [401, 403]: headers = update_token(headers) return stream(url, headers, stream_to, retry=False) # Successful Response elif response.status_code == 200: # Keep user updated with Progress Bar content_size = None if 'Content-Length' in response.headers: progress = 0 content_size = int(response.headers['Content-Length']) bot.show_progress(progress,content_size,length=35) chunk_size = 1 << 20 with open(stream_to,'wb') as filey: for chunk in response.iter_content(chunk_size=chunk_size): filey.write(chunk) if content_size is not None: progress+=chunk_size bot.show_progress(iteration=progress, total=content_size, length=35, carriage_return=False) # Newline to finish download sys.stdout.write('\n') return stream_to bot.error("Problem with stream, response %s" %(response.status_code)) sys.exit(1)
def record(self, images, action='add'): '''record an image from an endpoint. This function is akin to a pull, but without retrieving the image. We only care about the list of images (uris) to look up, and then the action that the user wants to take Parameters ========== images: refers to the uri given by the user to pull in the format <collection>/<namespace>. You should have an API that is able to retrieve metadata for a container based on this url. action: the action to take with the record. By default we add it, meaning adding a record (metadata and file url) to the database. It is recommended to place the URL for the image download under the container.url field, and the metadata (the image manifest) should have a selfLink to indicate where it came from. ''' # Take a look at pull for an example of this logic. if not isinstance(images, list): images = [images] bot.debug('Execution of RECORD[%s] for %s images' % (action, len(images))) for image in images: q = parse_image_name(remove_uri(image)) # Use container search to find the container based on uri bot.info('Searching for %s in gs://%s' % (q['uri'], self._bucket_name)) matches = self._container_query(q['uri'], quiet=True) if len(matches) == 0: bot.info('No matching containers found.') sys.exit(0) # We give the first match, the uri should be unique and known image = matches[0] image_uri = q['uri'] if "uri" in image.metadata: image_uri = image.metadata['uri'] # Update metadata with selfLink metadata = image.metadata metadata['selfLink'] = image.self_link # Use add without image path so added as a record container = self.add(image_uri=image_uri, metadata=metadata, url=image.media_link)
def delete(self, image, force=False): """delete an image from Google Storage. Parameters ========== name: the name of the file (or image) to delete """ bot.debug("DELETE %s" % image) files = self._container_query(image) for file_object in files: if confirm_delete(file_object.name, force): file_object.delete()
def _update_secrets(self): '''update secrets will take a secrets credential file either located at .sregistry or the environment variable SREGISTRY_CLIENT_SECRETS and update the current client secrets as well as the associated API base. For the case of using Docker Hub, if we find a .docker secrets file, we update from there. ''' bot.debug('Creating aws client...') try: from awscli.clidriver import create_clidriver except: bot.exit('Please install pip install sregistry[aws]') driver = create_clidriver() self.aws = driver.session.create_client('ecr')
def post(self, url, headers=None, data=None, return_json=True, default_headers=True): '''post will use requests to get a particular url ''' bot.debug("POST %s" % url) return self._call(url, headers=headers, func=requests.post, data=data, return_json=return_json, default_headers=default_headers)
def update_headers(self, fields=None): '''update headers with a token & other fields ''' do_reset = True if hasattr(self, 'headers'): if self.headers is not None: do_reset = False if do_reset is True: self._reset_headers() if fields is not None: for key, value in fields.items(): self.headers[key] = value header_names = ",".join(list(self.headers.keys())) bot.debug("Headers found: %s" % header_names)
def get(self, url, headers=None, token=None, data=None, return_json=True, default_headers=True, quiet=False): '''get will use requests to get a particular url ''' bot.debug("GET %s" % url) return self._call(url, headers=headers, func=requests.get, data=data, return_json=return_json, default_headers=default_headers, quiet=quiet)
def get_build_template(name="singularity-cloudbuild-local.json"): """get default build template. Parameters ========== name: singularity-cloudbuild-local.json (default) that will build a container interactively, waiting for the build to finish. singularity-cloudbuild-git.json build a recipe from a GitHub repository. """ base = get_installdir() name = "%s/main/templates/build/%s" % (base, name) if os.path.exists(name): bot.debug("Found template %s" % name) return read_json(name) bot.warning("Template %s not found." % name)
def get_build_template(name=None, manager="apt"): """get a particular build template, by default we return templates that are based on package managers. Parameters ========== name: the full path of the template file to use. manager: the package manager to use in the template (yum or apt) """ base = get_installdir() if name is None: name = "%s/main/templates/build/singularity-builder-%s.sh" % (base, manager) if os.path.exists(name): bot.debug("Found template %s" % name) return "".join(read_file(name)) bot.warning("Template %s not found." % name)
def init_db(self, db_path): '''initialize the database, with the default database path or custom of the format sqlite:////home/<username>/sregistry.db The custom path can be set with the environment var SREGISTRY_DATABASE when a user creates the client, we must initialize this db the database should use the .singularity cache folder to cache layers and images, and .singularity/sregistry.db as a database ''' # Database Setup, use default if uri not provided self.database = 'sqlite:///%s' % db_path self.storage = SREGISTRY_STORAGE # If the path isn't defined, cut out early if not db_path: return # Ensure that the parent_folder exists) parent_folder = os.path.dirname(db_path) # Case 1: Does not exist if not os.path.exists(parent_folder): bot.exit("Database location {} does not exist.".format(parent_folder)) # Case 2: Insufficient permission for write if not os.access(parent_folder, os.W_OK): bot.exit( "Insufficient permission to write to {}".format(parent_folder)) bot.debug("Database located at %s" % self.database) self.engine = create_engine(self.database, convert_unicode=True) self.session = scoped_session( sessionmaker(autocommit=False, autoflush=False, bind=self.engine)) Base.query = self.session.query_property() # import all modules here that might define models so that # they will be registered properly on the metadata. Otherwise # you will have to import them first before calling init_db() Base.metadata.create_all(bind=self.engine) self.Base = Base