def pull(cli): ch.dependencies_check() # Where does it go? dlcache = cli.storage + "/dlcache" if (cli.image_dir is not None): unpack_dir = cli.image_dir image_subdir = "" else: unpack_dir = cli.storage + "/img" image_subdir = None # infer from image ref # Set things up. ref = ch.Image_Ref(cli.image_ref) if (cli.parse_only): print(ref.as_verbose_str) sys.exit(0) image = ch.Image(ref, dlcache, unpack_dir, image_subdir) ch.INFO("pulling image: %s" % image.ref) if (cli.image_dir is not None): ch.INFO("destination: %s" % image.unpack_path) else: ch.DEBUG("destination: %s" % image.unpack_path) ch.DEBUG("use cache: %s" % (not cli.no_cache)) ch.DEBUG("download cache: %s" % image.download_cache) ch.DEBUG("manifest: %s" % image.manifest_path) # Pull! image.pull_to_unpacked(use_cache=(not cli.no_cache), last_layer=cli.last_layer) # Done. ch.INFO("done")
def config(img): ch.DEBUG("fakeroot: checking configs: %s" % img) for c in DEFAULT_CONFIGS: (path, rx) = c["match"] path_full = "%s/%s" % (img, path) ch.DEBUG("fakeroot: checking %s: grep '%s' %s" % (c["config"]["name"], rx, path)) if (os.path.isfile(path_full) and ch.grep_p(path_full, rx)): ch.DEBUG("fakeroot: using config %s" % c["config"]["name"]) return c["config"] ch.DEBUG("fakeroot: no config found") return None
def prepare(self): """Prepare self.image for pushing to self.dst_ref. Return tuple: (list of gzipped layer tarball paths, config as a sequence of bytes, manifest as a sequence of bytes). There is not currently any support for re-using any previously prepared files already in the upload cache, because we don't yet have a way to know if these have changed until they are already build.""" ch.mkdirs(ch.storage.upload_cache) tars_uc = self.image.tarballs_write(ch.storage.upload_cache) tars_c = list() config = self.config_new() manifest = self.manifest_new() # Prepare layers. for (i, tar_uc) in enumerate(tars_uc, start=1): ch.INFO("layer %d/%d: preparing" % (i, len(tars_uc))) path_uc = ch.storage.upload_cache // tar_uc hash_uc = ch.file_hash(path_uc) config["rootfs"]["diff_ids"].append("sha256:" + hash_uc) #size_uc = ch.file_size(path_uc) path_c = ch.file_gzip(path_uc, ["-9", "--no-name"]) tar_c = path_c.name hash_c = ch.file_hash(path_c) size_c = ch.file_size(path_c) tars_c.append((hash_c, path_c)) manifest["layers"].append({ "mediaType": ch.TYPE_LAYER, "size": size_c, "digest": "sha256:" + hash_c }) # Prepare metadata. ch.INFO("preparing metadata") config_bytes = json.dumps(config, indent=2).encode("UTF-8") config_hash = ch.bytes_hash(config_bytes) manifest["config"]["size"] = len(config_bytes) manifest["config"]["digest"] = "sha256:" + config_hash ch.DEBUG("config: %s\n%s" % (config_hash, config_bytes.decode("UTF-8"))) manifest_bytes = json.dumps(manifest, indent=2).encode("UTF-8") ch.DEBUG("manifest:\n%s" % manifest_bytes.decode("UTF-8")) # Store for the next steps. self.layers = tars_c self.config = config_bytes self.manifest = manifest_bytes
def execute_(self): # Complain about unsupported stuff. if (self.options.pop("platform", False)): self.unsupported_yet_fatal("--platform", 778) # Any remaining options are invalid. self.options_assert_empty() # Update image globals. global image_i image_i += 1 global image_alias image_alias = self.alias if (image_i == image_ct - 1): # Last image; use tag unchanged. tag = cli.tag elif (image_i > image_ct - 1): # Too many images! ch.FATAL("expected %d stages but found at least %d" % (image_ct, image_i + 1)) else: # Not last image; append stage index to tag. tag = "%s/_stage%d" % (cli.tag, image_i) image = ch.Image(ch.Image_Ref(tag), cli.storage + "/dlcache", cli.storage + "/img") images[image_i] = image if (self.alias is not None): images[self.alias] = image ch.DEBUG("image path: %s" % image.unpack_path) # Other error checking. if (str(image.ref) == str(self.base_ref)): ch.FATAL("output image ref same as FROM: %s" % self.base_ref) # Initialize image. self.base_image = ch.Image(self.base_ref, image.download_cache, image.unpack_dir) if (not os.path.isdir(self.base_image.unpack_path)): ch.DEBUG("image not found, pulling: %s" % self.base_image.unpack_path) self.base_image.pull_to_unpacked(fixup=True) image.copy_unpacked(self.base_image) env.reset() # Inject fakeroot preparatory stuff if needed. if (not cli.no_fakeroot): fakeroot.inject_first(image.unpack_path, env.env_build)
def inject_first(img, env): c = config(img) if (c is None): return if (os.path.exists("%s/ch/fakeroot-first-run")): ch.DEBUG("fakeroot: already initialized") return ch.INFO("fakeroot: initializing for %s" % c["name"]) for cl in c["first"]: ch.INFO("fakeroot: $ %s" % cl) args = ["/bin/sh", "-c", cl] ch.ch_run_modify(img, args, env)
def copy_src_file(self, src, dst): """Copy file src, named by COPY either explicitly or with wildcards, to dst. src might be a symlink, but dst is a canonical path. Both must be at the top level of the COPY instruction; i.e., this function must not be called recursively. If dst is a directory, file should go in that directory named src (i.e., the directory creation magic has already happened).""" assert (os.path.isfile(src)) assert (not os.path.exists(dst) or (os.path.isdir(dst) and not os.path.islink(dst)) or (os.path.isfile(dst) and not os.path.islink(dst))) ch.DEBUG("copying named file: %s -> %s" % (src, dst)) ch.copy2(src, dst, follow_symlinks=True)
def manifest_load(self): """Parse the manifest file and set self.config_hash and self.layer_hashes.""" def bad_key(key): ch.FATAL("manifest: %s: no key: %s" % (self.manifest_path, key)) # read and parse the JSON fp = ch.open_(self.manifest_path, "rt", encoding="UTF-8") text = ch.ossafe(fp.read, "can't read: %s" % self.manifest_path) ch.ossafe(fp.close, "can't close: %s" % self.manifest_path) ch.DEBUG("manifest:\n%s" % text) try: manifest = json.loads(text) except json.JSONDecodeError as x: ch.FATAL("can't parse manifest file: %s:%d: %s" % (self.manifest_path, x.lineno, x.msg)) # validate schema version try: version = manifest['schemaVersion'] except KeyError: bad_key("schemaVersion") if (version not in {1,2}): ch.FATAL("unsupported manifest schema version: %s" % repr(version)) # load config hash # # FIXME: Manifest version 1 does not list a config blob. It does have # things (plural) that look like a config at history/v1Compatibility as # an embedded JSON string :P but I haven't dug into it. if (version == 1): ch.WARNING("no config; manifest schema version 1") self.config_hash = None else: # version == 2 try: self.config_hash = ch.digest_trim(manifest["config"]["digest"]) except KeyError: bad_key("config/digest") # load layer hashes if (version == 1): key1 = "fsLayers" key2 = "blobSum" else: # version == 2 key1 = "layers" key2 = "digest" if (key1 not in manifest): bad_key(key1) self.layer_hashes = list() for i in manifest[key1]: if (key2 not in i): bad_key("%s/%s" % (key1, key2)) self.layer_hashes.append(ch.digest_trim(i[key2])) if (version == 1): self.layer_hashes.reverse()
def main(cli_): # CLI namespace. :P global cli cli = cli_ # Infer input file if needed. if (cli.file is None): cli.file = cli.context + "/Dockerfile" # Infer image name if needed. if (cli.tag is None): m = re.search(r"(([^/]+)/)?Dockerfile(\.(.+))?$", os.path.abspath(cli.file)) if (m is not None): if m.group(4): # extension cli.tag = m.group(4) elif m.group(2): # containing directory cli.tag = m.group(2) # Deal with build arguments. def build_arg_get(arg): kv = arg.split("=") if (len(kv) == 2): return kv else: v = os.getenv(kv[0]) if (v is None): ch.FATAL("--build-arg: %s: no value and not in environment" % kv[0]) return (kv[0], v) if (cli.build_arg is None): cli.build_arg = list() cli.build_arg = dict( build_arg_get(i) for i in cli.build_arg ) # Finish CLI initialization. ch.DEBUG(cli) ch.dependencies_check() # Guess whether the context is a URL, and error out if so. This can be a # typical looking URL e.g. "https://..." or also something like # "[email protected]:...". The line noise in the second line of the regex is # to match this second form. Username and host characters from # https://tools.ietf.org/html/rfc3986. if (re.search(r""" ^((git|git+ssh|http|https|ssh):// | ^[\w.~%!$&'\(\)\*\+,;=-]+@[\w.~%!$&'\(\)\*\+,;=-]+:)""", cli.context, re.VERBOSE) is not None): ch.FATAL("not yet supported: issue #773: URL context: %s" % cli.context) if (os.path.exists(cli.context + "/.dockerignore")): ch.WARNING("not yet supported, ignored: issue #777: .dockerignore file") # Set up build environment. global env env = Environment() # Read input file. if (cli.file == "-"): text = ch.ossafe(sys.stdin.read, "can't read stdin") else: fp = ch.open_(cli.file, "rt") text = ch.ossafe(fp.read, "can't read: %s" % cli.file) fp.close() # Parse it. parser = lark.Lark("?start: dockerfile\n" + ch.GRAMMAR, parser="earley", propagate_positions=True) # Avoid Lark issue #237: lark.exceptions.UnexpectedEOF if the file does not # end in newline. text += "\n" try: tree = parser.parse(text) except lark.exceptions.UnexpectedInput as x: ch.DEBUG(x) # noise about what was expected in the grammar ch.FATAL("can't parse: %s:%d,%d\n\n%s" % (cli.file, x.line, x.column, x.get_context(text, 39))) ch.DEBUG(tree.pretty()) # Sometimes we exit after parsing. if (cli.parse_only): sys.exit(0) # Count the number of stages (i.e., FROM instructions) global image_ct image_ct = sum(1 for i in ch.tree_children(tree, "from_")) # Traverse the tree and do what it says. # # We don't actually care whether the tree is traversed breadth-first or # depth-first, but we *do* care that instruction nodes are visited in # order. Neither visit() nor visit_topdown() are documented as of # 2020-06-11 [1], but examining source code [2] shows that visit_topdown() # uses Tree.iter_trees_topdown(), which *is* documented to be in-order [3]. # # This change seems to have been made in 0.8.6 (see PR #761); before then, # visit() was in order. Therefore, we call that instead, if visit_topdown() # is not present, to improve compatibility (see issue #792). # # [1]: https://lark-parser.readthedocs.io/en/latest/visitors/#visitors # [2]: https://github.com/lark-parser/lark/blob/445c8d4/lark/visitors.py#L211 # [3]: https://lark-parser.readthedocs.io/en/latest/classes/#tree ml = Main_Loop() if (hasattr(ml, 'visit_topdown')): ml.visit_topdown(tree) else: ml.visit(tree) # Check that all build arguments were consumed. if (len(cli.build_arg) != 0): ch.FATAL("--build-arg: not consumed: " + " ".join(cli.build_arg.keys())) # Print summary & we're done. if (ml.instruction_ct == 0): ch.FATAL("no instructions found: %s" % cli.file) assert (image_i + 1 == image_ct) # should have errored already if not ch.INFO("grown in %d instructions: %s" % (ml.instruction_ct, images[image_i]))
def execute_(self): # Complain about unsupported stuff. if (self.options.pop("chown", False)): self.unsupported_forever_warn("--chown") # Any remaining options are invalid. self.options_assert_empty() # Find the source directory. if (self.from_ is None): context = cli.context else: if (self.from_ == image_i or self.from_ == image_alias): ch.FATAL("COPY --from: stage %s is the current stage" % self.from_) if (not self.from_ in images): # FIXME: Would be nice to also report if a named stage is below. if (isinstance(self.from_, int) and self.from_ < image_ct): if (self.from_ < 0): ch.FATAL("COPY --from: invalid negative stage index %d" % self.from_) else: ch.FATAL("COPY --from: stage %d does not exist yet" % self.from_) else: ch.FATAL("COPY --from: stage %s does not exist" % self.from_) context = images[self.from_].unpack_path ch.DEBUG("context: " + context) # Do the copy. srcs = list() for src in self.srcs: if (os.path.normpath(src).startswith("..")): ch.FATAL("can't COPY: %s climbs outside context" % src) for i in glob.glob(context + "/" + src): srcs.append(i) if (len(srcs) == 0): ch.FATAL("can't COPY: no sources exist") dst = images[image_i].unpack_path + "/" if (not self.dst.startswith("/")): dst += env.workdir + "/" dst += self.dst if (dst.endswith("/") or len(srcs) > 1 or os.path.isdir(srcs[0])): # Create destination directory. if (dst.endswith("/")): dst = dst[:-1] if (os.path.exists(dst) and not os.path.isdir(dst)): ch.FATAL("can't COPY: %s exists but is not a directory" % dst) ch.mkdirs(dst) for src in srcs: # Check for symlinks to outside context. src_real = os.path.realpath(src) context_real = os.path.realpath(context) if (not os.path.commonpath([src_real, context_real]) \ .startswith(context_real)): ch.FATAL("can't COPY: %s climbs outside context via symlink" % src) # Do the copy. if (os.path.isfile(src)): # or symlink to file ch.DEBUG("COPY via copy2 file %s to %s" % (src, dst)) ch.copy2(src, dst, follow_symlinks=True) elif (os.path.isdir(src)): # or symlink to directory # Copy *contents* of src, not src itself. Note: shutil.copytree() # has a parameter dirs_exist_ok that I think will make this easier # in Python 3.8. ch.DEBUG("COPY dir %s to %s" % (src, dst)) if (not os.path.isdir(dst)): ch.FATAL("can't COPY: destination not a directory: %s to %s" % (src, dst)) for src2_basename in ch.ossafe( os.listdir, "can't list directory: %s" % src, src): src2 = src + "/" + src2_basename if (os.path.islink(src2)): # Symlinks within directories do not get dereferenced. ch.DEBUG("symlink via copy2: %s to %s" % (src2, dst)) ch.copy2(src2, dst, follow_symlinks=False) elif (os.path.isfile(src2)): # not symlink to file ch.DEBUG("file via copy2: %s to %s" % (src2, dst)) ch.copy2(src2, dst) elif (os.path.isdir(src2)): # not symlink to directory dst2 = dst + "/" + src2_basename ch.DEBUG("directory via copytree: %s to %s" % (src2, dst2)) ch.copytree(src2, dst2, symlinks=True, ignore_dangling_symlinks=True) else: ch.FATAL("can't COPY unknown file type: %s" % src2) else: ch.FATAL("can't COPY unknown file type: %s" % src)
def manifest_load(self, continue_404=False): """Download the manifest file if needed, parse it, and set self.config_hash and self.layer_hashes. By default, if the image does not exist, exit with error; if continue_404, then log the condition but do not exit. In this case, self.config_hash and self.layer_hashes will both be None.""" def bad_key(key): ch.FATAL("manifest: %s: no key: %s" % (self.manifest_path, key)) self.config_hash = None self.layer_hashes = None # obtain the manifest try: # internal manifest library, e.g. for "FROM scratch" manifest = manifests_internal[str(self.image.ref)] ch.INFO("manifest: using internal library") except KeyError: # download the file if needed, then parse it if (ch.arch == "yolo" or self.architectures is None): digest = None else: digest = self.architectures[ch.arch] ch.DEBUG("manifest digest: %s" % digest) if (os.path.exists(self.manifest_path) and self.use_cache): ch.INFO("manifest: using existing file") else: ch.INFO("manifest: downloading") self.registry.manifest_to_file(self.manifest_path, digest=digest, continue_404=continue_404) if (not os.path.exists(self.manifest_path)): # response was 404 (or equivalent) ch.INFO("manifest: none found") return manifest = ch.json_from_file(self.manifest_path, "manifest") # validate schema version try: version = manifest['schemaVersion'] except KeyError: bad_key("schemaVersion") if (version not in {1, 2}): ch.FATAL("unsupported manifest schema version: %s" % repr(version)) # load config hash # # FIXME: Manifest version 1 does not list a config blob. It does have # things (plural) that look like a config at history/v1Compatibility as # an embedded JSON string :P but I haven't dug into it. if (version == 1): ch.VERBOSE("no config; manifest schema version 1") self.config_hash = None else: # version == 2 try: self.config_hash = manifest["config"]["digest"] if (self.config_hash is not None): self.config_hash = ch.digest_trim(self.config_hash) except KeyError: bad_key("config/digest") # load layer hashes if (version == 1): key1 = "fsLayers" key2 = "blobSum" else: # version == 2 key1 = "layers" key2 = "digest" if (key1 not in manifest): bad_key(key1) self.layer_hashes = list() for i in manifest[key1]: if (key2 not in i): bad_key("%s/%s" % (key1, key2)) self.layer_hashes.append(ch.digest_trim(i[key2])) if (version == 1): self.layer_hashes.reverse()
def copy_src_dir(self, src, dst): """Copy the contents of directory src, named by COPY, either explicitly or with wildcards, to dst. src might be a symlink, but dst is a canonical path. Both must be at the top level of the COPY instruction; i.e., this function must not be called recursively. dst must exist already and be a directory. Unlike subdirectories, the metadata of dst will not be altered to match src.""" def onerror(x): ch.FATAL("can't scan directory: %s: %s" % (x.filename, x.strerror)) # Use Path objects in this method because the path arithmetic was # getting too hard with strings. src = ch.Path(os.path.realpath(src)) dst = ch.Path(dst) assert (os.path.isdir(src) and not os.path.islink(src)) assert (os.path.isdir(dst) and not os.path.islink(dst)) ch.DEBUG("copying named directory: %s -> %s" % (src, dst)) for (dirpath, dirnames, filenames) in os.walk(src, onerror=onerror): dirpath = ch.Path(dirpath) subdir = dirpath.relative_to(src) dst_dir = dst // subdir # dirnames can contain symlinks, which we handle as files, so we'll # rebuild it; the walk will not descend into those "directories". dirnames2 = dirnames.copy() # shallow copy dirnames[:] = list() # clear in place for d in dirnames2: d = ch.Path(d) src_path = dirpath // d dst_path = dst_dir // d ch.TRACE("dir: %s -> %s" % (src_path, dst_path)) if (os.path.islink(src_path)): filenames.append(d) # symlink, handle as file ch.TRACE("symlink to dir, will handle as file") continue else: dirnames.append(d) # directory, descend into later # If destination exists, but isn't a directory, remove it. if (os.path.exists(dst_path)): if (os.path.isdir(dst_path) and not os.path.islink(dst_path)): ch.TRACE("dst_path exists and is a directory") else: ch.TRACE("dst_path exists, not a directory, removing") ch.unlink(dst_path) # If destination directory doesn't exist, create it. if (not os.path.exists(dst_path)): ch.TRACE("mkdir dst_path") ch.ossafe(os.mkdir, "can't mkdir: %s" % dst_path, dst_path) # Copy metadata, now that we know the destination exists and is a # directory. ch.ossafe(shutil.copystat, "can't copy metadata: %s -> %s" % (src_path, dst_path), src_path, dst_path, follow_symlinks=False) for f in filenames: f = ch.Path(f) src_path = dirpath // f dst_path = dst_dir // f ch.TRACE("file or symlink via copy2: %s -> %s" % (src_path, dst_path)) if (not (os.path.isfile(src_path) or os.path.islink(src_path))): ch.FATAL("can't COPY: unknown file type: %s" % src_path) if (os.path.exists(dst_path)): ch.TRACE("destination exists, removing") if (os.path.isdir(dst_path) and not os.path.islink(dst_path)): ch.rmtree(dst_path) else: ch.unlink(dst_path) ch.copy2(src_path, dst_path, follow_symlinks=False)