def record_progress(self, build: Build, content: str, layer_id: str, build_id: str = None) -> Tuple[str, str]: """ record build progress to the database :param build: :param content: str or None :param layer_id: :param build_id: :return: """ if build_id: build = self.db.get_build(build_id) base_image_id = build.get_top_layer_id() was_cached = False if not layer_id: # skipped task, it was cached if content: layer_id = self.get_layer(content, base_image_id) builder = self.get_builder(build) if not builder.is_image_present(layer_id): logger.info("layer %s for content %s does not exist", layer_id, content) layer_id = None if not layer_id: return None, None was_cached = True build.record_layer(content, layer_id, base_image_id, cached=was_cached) self.db.record_build(build) return base_image_id, layer_id
def __init__(self, playbook_path): """ :param playbook_path: str, path to playbook """ self.playbook_path = playbook_path self.build = Build() self.metadata = ImageMetadata() self.build.metadata = self.metadata
def test_find_py_intrprtr_in_fedora_image(image_name, found): build = Build() build.base_image = image_name build.target_image = "starena" bb = BuildahBuilder(build) try: assert bb.find_python_interpreter() except RuntimeError: if found: # interpreter should have been found raise
def build(target_image): build = Build() build.debug = True build.playbook_path = basic_playbook_path build.base_image = base_image build.target_image = target_image build.metadata = ImageMetadata() build.state = BuildState.NEW build.builder_name = "buildah" # test with all builders return build
def test_buildah_sanity_check_extra_args(caplog): set_logging(level=logging.DEBUG) build = Build() build.base_image = base_image build.buildah_from_extra_args = "--help" b = BuildahBuilder(build, debug=True) b.ansible_host = "cacao" with pytest.raises(CalledProcessError): b.sanity_check() for r in caplog.records: if "-h, --help" in r.message: break else: assert 1/0, "it seems that buildah_from_extra_args were not passed to sanity check"
def test_file_caching_mechanism(tmpdir, application, build): """ make sure that we don't load from cache when a file was changed """ t = str(tmpdir) pb_name = "file_caching.yaml" test_file_name = "a_bag_of_fun" file_caching_pb = os.path.join(data_dir, pb_name) p = os.path.join(t, pb_name) test_file = os.path.join(data_dir, test_file_name) f = os.path.join(t, test_file_name) shutil.copy(file_caching_pb, p) shutil.copy(test_file, f) with open(p) as fd: d = yaml.safe_load(fd) d[0]["tasks"][0]["copy"]["src"] = f with open(p, "w") as fd: yaml.safe_dump(d, fd) build.playbook_path = p second_build = Build.from_json(build.to_dict()) cached_build = Build.from_json(build.to_dict()) application.build(build) build = application.db.get_build(build.build_id) assert len(build.layers) == 2 assert build.layers[0].cached assert not build.layers[1].cached # ideally this would be cached, but isn't now application.build(cached_build) cached_build = application.db.get_build(cached_build.build_id) assert len(cached_build.layers) == 2 assert cached_build.layers[0].cached # since ansible doesn't track files and whether they changed, let's just make sure it works we expect it to work assert not cached_build.layers[1].cached # and now we test that if we change the file, it's not loaded from cache fun_content = "Much more fun, fun, fun!" with open(f, "w") as fd: fd.write(fun_content) application.build(second_build) second_build = application.db.get_build(second_build.build_id) assert not second_build.layers[1].cached builder = application.get_builder(second_build) out = builder.run(second_build.target_image, ["cat", "/fun"]) assert out == fun_content
def test_get_version_rhel_8(): flexmock(buildah_builder, run_cmd=lambda *args, **kwargs: BUILDAH_15_VERSION) b = BuildahBuilder(Build(), debug=True) version = b.get_buildah_version() assert [x for x in version if isinstance(x, int)] assert version < (1, 7, 3)
def test_no_cache_tag(application, build): """ utilize a playbook which halts caching """ dont_cache_b = Build.from_json(build.to_dict()) build.playbook_path = dont_cache_playbook_path_pre application.build(build) build = application.db.get_build(build.build_id) assert len(build.layers) == 4 assert build.layers[0].cached assert not build.layers[1].cached assert not build.layers[2].cached assert not build.layers[3].cached dont_cache_b.target_image += "2" dont_cache_b.playbook_path = dont_cache_playbook_path application.build(dont_cache_b) dont_cache_b = application.db.get_build(dont_cache_b.build_id) assert len(dont_cache_b.layers) == 4 assert dont_cache_b.layers[0].cached assert dont_cache_b.layers[1].cached assert not dont_cache_b.layers[2].cached assert not dont_cache_b.layers[3].cached builder = application.get_builder(dont_cache_b) builder.run(dont_cache_b.target_image, ["ls", "-1", "/asd"])
def load_builds(self): """ provide a list of all available builds :return: a list of Build instances """ with self.acquire(): data = self._load() return [Build.from_json(b) for b in data["builds"].values()]
def test_dir_caching(tmpdir, application, build): """ make sure that we don't load from cache when files within multi level directories change """ t = str(tmpdir) pb_name = "file_caching.yaml" test_dir_name = "a_directory_of_fun" file_caching_pb = os.path.join(data_dir, pb_name) p = os.path.join(t, pb_name) test_dir = os.path.join(data_dir, test_dir_name) f = os.path.join(t, test_dir_name) sub_f = os.path.join(f, "fun_subdir", "fun_subfile") shutil.copy(file_caching_pb, p) shutil.copytree(test_dir, f) with open(p) as fd: d = yaml.safe_load(fd) d[0]["tasks"][0]["copy"]["src"] = f with open(p, "w") as fd: yaml.safe_dump(d, fd) build.playbook_path = p second_build = Build.from_json(build.to_dict()) cached_build = Build.from_json(build.to_dict()) application.build(build) build = application.db.get_build(build.build_id) assert len(build.layers) == 2 assert build.layers[0].cached assert not build.layers[1].cached application.build(cached_build) cached_build = application.db.get_build(cached_build.build_id) assert len(cached_build.layers) == 2 assert cached_build.layers[0].cached assert cached_build.layers[1].cached # and now we test that if we change a subfile, it's not loaded from cache fun_content = "Much more fun, fun, fun!" with open(sub_f, "w") as fd: fd.write(fun_content) application.build(second_build) second_build = application.db.get_build(second_build.build_id) assert not second_build.layers[1].cached
def test_caching_non_ex_image(tmpdir, application, build): """ scenario: we perform a build, we remove an image from cache, we perform the build again, ab should recover """ t = str(tmpdir) non_ex_pb_basename = os.path.basename(non_ex_pb) p = os.path.join(t, non_ex_pb_basename) shutil.copy(non_ex_pb, p) with open(p) as fd: d = yaml.safe_load(fd) d[0]["tasks"][0]["debug"]["msg"] = f"Hello {random_str()}" with open(p, "w") as fd: yaml.safe_dump(d, fd) image_name = random_str(5) build.playbook_path = p build.target_image = image_name application.build(build) build = application.db.get_build(build.build_id) # for debugging layers = build.layers final_layer_id = build.final_layer_id import subprocess subprocess.call(["podman", "images", "--all"]) subprocess.call(["podman", "inspect", build.target_image]) # FIXME: this command fails in CI, which is super weird run_cmd(["buildah", "rmi", build.target_image], ignore_status=True, print_output=True) run_cmd(["buildah", "rmi", build.final_layer_id], ignore_status=True, print_output=True) # now remove all images from the cache layers = build.layers[1:] layers.reverse() for l in layers: if l.base_image_id: run_cmd(["buildah", "rmi", l.layer_id], ignore_status=True, print_output=True) second_build = Build.from_json(build.to_dict()) second_build.build_id = "33" application.build(second_build) run_cmd(["buildah", "rmi", build.target_image], ignore_status=True, print_output=True)
def test_caching(application, build): b2 = Build.from_json(build.to_dict()) application.build(build) b2.build_id = None b2.layers = [] b2.target_image += "2" application.build(b2) build = application.db.get_build(build.build_id) b2 = application.db.get_build(b2.build_id) assert [x.layer_id for x in b2.layers[:3]] == [y.layer_id for y in build.layers[:3]] assert not b2.layers[4].cached assert not build.layers[4].cached assert len(build.layers) == 5
def _load_build(data, build_id, is_latest=False): """ load selected build from database :param data: dict :param build_id: str or None :param is_latest: bool :return: build """ try: return Build.from_json(data["builds"][build_id]) except KeyError: if is_latest: raise RuntimeError("Latest build with ID %s is no longer available, probably got cleaned." % build_id) else: raise RuntimeError("There is no such build with ID %s" % build_id)
def test_caching_mechanism(application, build): """ check that previously executed tasks are being loaded from cache and new ones are computed from scratch """ small_build = Build.from_json(build.to_dict()) small_build.target_image += "2" small_build.playbook_path = small_basic_playbook_path application.build(small_build) small_build = application.db.get_build(small_build.build_id) assert len(small_build.layers) == 2 assert small_build.layers[0].cached assert not small_build.layers[1].cached application.build(build) build = application.db.get_build(build.build_id) assert len(build.layers) == 5 assert build.layers[0].cached assert build.layers[1].cached assert not build.layers[2].cached assert not build.layers[3].cached assert not build.layers[4].cached
def build_inside_openshift(app): """ This is expected to run inside an openshift pod spawned via custom build :param app: instance of Application """ playbook_path, base_image = okd_get_playbook_base() if playbook_path.startswith("/"): raise RuntimeError( "The path to playbook needs to be relative within the git repo.") uri, ref, target_image = okd_load_metadata() tmp = tempfile.mkdtemp(prefix="ab-okd") try: git_clone_to_path(uri, tmp, ref=ref) playbook_path = os.path.abspath(os.path.join(tmp, playbook_path)) if not playbook_path.startswith(tmp): raise RuntimeError( "The path to playbook points outside of the git repo, this is not allowed." ) build = Build() build.metadata = ImageMetadata() # TODO: needs to be figured out build.playbook_path = playbook_path build.base_image = base_image build.target_image = target_image build.builder_name = "buildah" build.cache_tasks = False # we have local storage in pod, so this doesn't make any sense app.build(build) finally: shutil.rmtree(tmp)
class PbVarsParser: def __init__(self, playbook_path): """ :param playbook_path: str, path to playbook """ self.playbook_path = playbook_path self.build = Build() self.metadata = ImageMetadata() self.build.metadata = self.metadata def expand_pb_vars(self): """ populate vars from a playbook, defined in vars section :return: dict with the content of ansible_bender var """ with open(self.playbook_path) as fd: plays = yaml.safe_load(fd) for play in plays[1:]: bender_vars = graceful_get(play, "vars", "ansible_bender") if bender_vars: logger.warning( "Variables are loaded only from the first play.") try: # we care only about the first play, we don't want to merge dicts d = plays[0] except IndexError: raise RuntimeError( "Invalid playbook, can't access the first document.") bender_vars = graceful_get(d, "vars", "ansible_bender") if not bender_vars: logger.info("no bender data found in the playbook") return {} tmp = tempfile.mkdtemp(prefix="ab") json_data_path = os.path.join(tmp, "j.json") # we cannot use "vars" variable because the variables are not expanded in there pb_vars = copy.deepcopy(d["vars"]) while True: # just in case the variable is already defined timestamp = datetime.datetime.now().strftime( TIMESTAMP_FORMAT_TOGETHER) ab_vars_key = f"ab_vars_{timestamp}" if ab_vars_key not in pb_vars: logger.debug("ab vars key = %s", ab_vars_key) pb_vars[ab_vars_key] = d["vars"] break jinja_pb_vars_key = '{{ %s }}' % ab_vars_key pb = { "name": "Let Ansible expand variables", "hosts": "localhost", "vars": pb_vars, "vars_files": d.get("vars_files", []), "gather_facts": False, "tasks": [{ "debug": { "msg": jinja_pb_vars_key } }, { "copy": { "dest": json_data_path, "content": jinja_pb_vars_key } }] } i_path = os.path.join(tmp, "i") with open(i_path, "w") as fd: fd.write("localhost ansible_connection=local") # json is easier to parse than yaml tmp_pb_path = os.path.join(tmp, "p.json") with open(tmp_pb_path, "w") as fd: json.dump([pb], fd) playbook_base = os.path.basename(self.playbook_path).split(".", 1)[0] symlink_name = f".{playbook_base}-{timestamp}-{random_str()}.yaml" playbook_dir = os.path.dirname(self.playbook_path) symlink_path = os.path.join(playbook_dir, symlink_name) os.symlink(tmp_pb_path, symlink_path) # yeah, ansible is not very smart for connection=local args = ["-e", f"ansible_python_interpreter={sys.executable}"] try: run_playbook(symlink_path, i_path, None, connection="local", try_unshare=False, provide_output=False, log_stderr=True, ansible_args=args) with open(json_data_path) as fd: return json.load(fd)["ansible_bender"] finally: os.unlink(symlink_path) shutil.rmtree(tmp) def process_pb_vars(self, bender_data): """ accept variables from the playbook and update the Build and ImageMetadata objects with them :param bender_data: dict with the content of ansible_bender vars :return: """ if not bender_data: return self.metadata.update_from_configuration( bender_data.get("target_image", {})) self.build.update_from_configuration(bender_data) def get_build_and_metadata(self): """ extra vars from the selected playbook :return: Build(), ImageMetadata() """ bender_data = self.expand_pb_vars() self.process_pb_vars(bender_data) return self.build, self.metadata
def _load_build(data, build_id): try: return Build.from_json(data["builds"][build_id]) except KeyError: raise RuntimeError("There is no such build with ID %s" % build_id)
def build(self, build: Build): """ build container image :param build: instance of Build """ if not os.path.isfile(build.playbook_path): raise RuntimeError("No such file or directory: %s" % build.playbook_path) build.validate() build.metadata.validate() build.debug = self.debug build.verbose = self.verbose # we have to record as soon as possible self.db.record_build(build) try: builder = self.get_builder(build) builder.sanity_check() # before we start messing with the base image, we need to check for its presence first if not builder.is_base_image_present(): builder.pull() build.pulled = True builder.check_container_creation() # let's record base image as a first layer base_image_id = builder.get_image_id(build.base_image) build.record_layer(None, base_image_id, None, cached=True) a_runner = AnsibleRunner(build.playbook_path, builder, build, debug=self.debug) # we are about to perform the build build.build_start_time = datetime.datetime.now() self.db.record_build(build, build_state=BuildState.IN_PROGRESS) build.python_interpreter = build.python_interpreter or self.db.load_python_interpreter(base_image_id) if not build.python_interpreter: build.python_interpreter = builder.find_python_interpreter() self.db.record_python_interpreter(base_image_id, build.python_interpreter) builder.create() except Exception: self.db.record_build( None, build_id=build.build_id, build_state=BuildState.FAILED, set_finish_time=True ) raise try: try: output = a_runner.build(self.db_path) except ABBuildUnsuccesful as ex: b = self.db.record_build(None, build_id=build.build_id, build_state=BuildState.FAILED, set_finish_time=True) b.log_lines = ex.output.split("\n") self.db.record_build(b) timestamp = datetime.datetime.now().strftime(TIMESTAMP_FORMAT) image_name = build.target_image + "-" + timestamp + "-failed" b.target_image = image_name image_id = builder.commit(image_name) b.final_layer_id = image_id self.record_progress(b, None, image_id) out_logger.info("Image build failed /o\\") out_logger.info("The progress is saved into image '%s'", image_name) raise b = self.db.record_build(None, build_id=build.build_id, build_state=BuildState.DONE, set_finish_time=True) b.log_lines = output # commit the final image and apply all metadata b.final_layer_id = builder.commit(build.target_image, final_image=True) if b.squash: logger.debug("Squashing metadata into a single layer") # reset layers if squashing b.wipe_layers() self.record_progress(b, None, b.final_layer_id) if not b.is_layering_on(): self.record_progress(b, None, b.final_layer_id) else: self.db.record_build(b) out_logger.info("Image '%s' was built successfully \\o/", build.target_image) finally: builder.clean()
class PbVarsParser: def __init__(self, playbook_path): """ :param playbook_path: str, path to playbook """ self.playbook_path = playbook_path self.build = Build() self.metadata = ImageMetadata() self.build.metadata = self.metadata def _check_selinux_iz_gud(self): """ This is a workaround for a weird behavior of ansible: if selinux is in the enforcing mode and python3-libselinux is not installed, ansible freezes https://bugzilla.redhat.com/show_bug.cgi?id=1696706 :return: """ try: enforcing_status = Path("/sys/fs/selinux/enforce").read_text() except FileNotFoundError: logger.debug( "this system is not using selinux, /sys/fs/selinux/enforce is not present" ) return logger.debug(f"selinux enforce status = {enforcing_status}") # it can be enforcing or not, selinux python module needs to be present try: importlib.import_module("selinux") except ModuleNotFoundError: raise RuntimeError( "\nThis system is using selinux(8) and selinux python module is not installed. " "There is a known issue in ansible that it freezes in this setup:\n" " https://bugzilla.redhat.com/show_bug.cgi?id=1696706\n" "Please install libselinux python bindings (on Fedora the package name is python3-libselinux)." ) def expand_pb_vars(self): """ populate vars from a playbook, defined in vars section :return: dict with the content of ansible_bender var """ self._check_selinux_iz_gud() with open(self.playbook_path) as fd: plays = yaml.safe_load(fd) for play in plays[1:]: bender_vars = graceful_get(play, "vars", "ansible_bender") if bender_vars: logger.warning( "Variables are loaded only from the first play.") try: # we care only about the first play, we don't want to merge dicts d = plays[0] except IndexError: raise RuntimeError( "Invalid playbook, can't access the first document.") bender_vars = graceful_get(d, "vars", "ansible_bender") if not bender_vars: logger.info("no bender data found in the playbook") return {} tmp = tempfile.mkdtemp(prefix="ab") json_data_path = os.path.join(tmp, "j.json") # we cannot use "vars" variable because the variables are not expanded in there pb_vars = copy.deepcopy(d["vars"]) while True: # just in case the variable is already defined timestamp = datetime.datetime.now().strftime( TIMESTAMP_FORMAT_TOGETHER) ab_vars_key = f"ab_vars_{timestamp}" if ab_vars_key not in pb_vars: logger.debug("ab vars key = %s", ab_vars_key) pb_vars[ab_vars_key] = d["vars"] break jinja_pb_vars_key = '{{ %s }}' % ab_vars_key pb = { "name": "Let Ansible expand variables", "hosts": "localhost", "vars": pb_vars, "vars_files": d.get("vars_files", []), "gather_facts": False, "tasks": [{ "debug": { "msg": jinja_pb_vars_key } }, { "copy": { "dest": json_data_path, "content": jinja_pb_vars_key } }] } i_path = os.path.join(tmp, "i") with open(i_path, "w") as fd: fd.write("localhost ansible_connection=local") # json is easier to parse than yaml tmp_pb_path = os.path.join(tmp, "p.json") with open(tmp_pb_path, "w") as fd: json.dump([pb], fd) playbook_base = os.path.basename(self.playbook_path).split(".", 1)[0] symlink_name = f".{playbook_base}-{timestamp}-{random_str()}.yaml" playbook_dir = os.path.dirname(self.playbook_path) symlink_path = os.path.join(playbook_dir, symlink_name) os.symlink(tmp_pb_path, symlink_path) # yeah, ansible is not very smart for connection=local args = ["-e", f"ansible_python_interpreter={sys.executable}"] try: run_playbook(symlink_path, i_path, None, connection="local", try_unshare=False, provide_output=False, log_stderr=True, ansible_args=args) with open(json_data_path) as fd: return json.load(fd)["ansible_bender"] finally: os.unlink(symlink_path) shutil.rmtree(tmp) def process_pb_vars(self, bender_data): """ accept variables from the playbook and update the Build and ImageMetadata objects with them :param bender_data: dict with the content of ansible_bender vars :return: """ if not bender_data: return try: # validation to error out unknown keys in /vars/ansible_bender jsonschema.validate(bender_data, PLAYBOOK_SCHEMA) except jsonschema.ValidationError as validation_error: if validation_error.validator == "type": # error is due to invalid value datatype path = "/" + "/".join(validation_error.path) expected_types = validation_error.validator_value if isinstance(validation_error.validator_value, list): expected_types = ", ".join( validation_error.validator_value) message = f"variable {path} is set to {validation_error.instance}" \ f", which is not of type {expected_types}" raise ABValidationError(message) from validation_error else: # error is due to absence of a required key, unknown keys playbook or any other kind raise ABValidationError( validation_error.message) from validation_error self.metadata.update_from_configuration( bender_data.get("target_image", {})) self.build.update_from_configuration(bender_data) def get_build_and_metadata(self): """ extra vars from the selected playbook :return: Build(), ImageMetadata() """ bender_data = self.expand_pb_vars() self.process_pb_vars(bender_data) return self.build, self.metadata
def test_get_version(): b = BuildahBuilder(Build(), debug=True) version = b.get_buildah_version() assert [x for x in version if isinstance(x, int)]