def read(cls): unpacked = struct.unpack('I', sys.stdin.read(4)) size = unpacked[0] if size <= 0: raise Err("Expected non-zero size for Protobuf") data = sys.stdin.read(size) if len(data) != size: raise Err("Expected %d bytes; received %d", size, len(data)) return deserialize(cls, data)
def __call__(self, *args): try: name = args[0] method = { "launch": self.launch, "update": self.update, "usage": self.usage, "wait": self.wait, "destroy": self.destroy }[name] except IndexError: raise Err("Please choose a subcommand") except KeyError: raise Err("Subcommand %s is not valid for containerizers" % name) return method(*args[1:])
def observe(self, *args): log.info(" ".join(args)) state = deimos.state.State(self.state_root, mesos_id=args[0]) self.state = state deimos.sig.install(self.stop_docker_and_resume) state.await_launch() try: # Take the wait lock to block calls to wait() state.lock("wait", LOCK_SH, seconds=None) except IOError as e: # Allows for signal recovery if e.errno != errno.EINTR: raise e state.lock("wait", LOCK_SH, seconds=1) if state.exit() is not None: return state.exit() raise Err("Wait lock is not held nor is exit file present")
def wait(self, *args): log.info(" ".join(list(args))) if list(args[0:1]) != ["--docker"]: return # We rely on the Mesos default wait strategy in general # In Docker mode, we use Docker wait to wait for the container and # then exit with the returned exit code. The passed in ID should be a # Docker CID, not a Mesos container ID. state = deimos.state.State(self.state_root, docker_id=args[1]) self.state = state deimos.sig.install(self.stop_docker_and_resume) state.await_launch() try: state.lock("wait", LOCK_SH, seconds=None) except IOError as e: # Allows for signal recovery if e.errno != errno.EINTR: raise e state.lock("wait", LOCK_SH, 1) if state.exit() is not None: return state.exit() raise Err("Wait lock is not held nor is exit file present")
def wait(self, wait_pb, *args): log.info(" ".join(args)) container_id = wait_pb.container_id.value state = deimos.state.State(self.state_root, mesos_id=container_id) self.state = state deimos.sig.install(self.stop_docker_and_resume) state.await_launch() try: # Wait for the observe lock so observe completes first state.lock("observe", LOCK_SH, seconds=None) state.lock("wait", LOCK_SH, seconds=None) except IOError as e: # Allows for signal recovery if e.errno != errno.EINTR: raise e state.lock("observe", LOCK_SH, seconds=1) state.lock("wait", LOCK_SH, seconds=1) termination = (state.exit() if state.exit() is not None else 64) << 8 recordio.write(Termination, killed=False, message="", status=termination) if state.exit() is not None: return state.exit() raise Err("Wait lock is not held nor is exit file present")
def url_to_image(url): pre, image = re.split(r"^docker:///?", url) if pre != "": raise Err("URL '%s' is not a valid docker:// URL!" % url) return image
def launch(self, launch_pb, *args): log.info(" ".join(args)) fork = False if "--no-fork" in args else True deimos.sig.install(self.log_signal) run_options = [] launchy = deimos.mesos.Launch(launch_pb) state = deimos.state.State(self.state_root, mesos_id=launchy.container_id) state.push() lk_l = state.lock("launch", LOCK_EX) state.executor_id = launchy.executor_id state.push() state.ids() mesos_directory() # Redundant? if launchy.directory: os.chdir(launchy.directory) # TODO: if launchy.user: # os.seteuid(launchy.user) url, options = self.container_settings.override(*launchy.container) pre, image = re.split(r"^docker:///?", url) if pre != "": raise Err("URL '%s' is not a valid docker:// URL!" % url) if image == "": image = self.default_image(launchy) log.info("image = %s", image) run_options += ["--sig-proxy"] run_options += ["--rm"] # This is how we ensure container cleanup run_options += ["--cidfile", state.resolve("cid")] place_uris(launchy, self.shared_dir, self.optimistic_unpack) run_options += ["-w", self.workdir] # Docker requires an absolute path to a source filesystem, separated # from the bind path in the container with a colon, but the absolute # path to the Mesos sandbox might have colons in it (TaskIDs with # timestamps can cause this situation). So we create a soft link to it # and mount that. shared_full = os.path.abspath(self.shared_dir) sandbox_symlink = state.sandbox_symlink(shared_full) run_options += ["-v", "%s:%s" % (sandbox_symlink, self.workdir)] cpus, mems = launchy.cpu_and_mem env = launchy.env run_options += options # We need to wrap the call to Docker in a call to the Mesos executor # if no executor is passed as part of the task. We need to pass the # MESOS_* environment variables in to the container if we're going to # start an executor. observer_argv = None if launchy.needs_observer: # NB: The "@@docker@@" variant is a work around for Mesos's option # parser. There is a fix in the pipeline. observer_argv = [ mesos_executor(), "--override", deimos.path.me(), "observe", state.mesos_id ] state.lock("observe", LOCK_EX | LOCK_NB) ####### Explanation of Locks # When the observer is running, we would like its call to # observe() to finish before all the wait(); and we'd like the # observer to have a chance to report TASK_FINISHED before the # calls to wait() report their results (which would result in a # TASK_FAILED). # # For this reason, we take the "observe" lock in launch(), before # we call the observer and before releasing the "launch" or "wait" # locks. # # Calls to observe() actually skip locking "observe"; but wait() # calls must take this lock. The "observe" lock is held by # launch() until the observer executor completes, at which point # we can be reasonably sure its status was propagated to the Mesos # slave. else: env += mesos_env() + [("MESOS_DIRECTORY", self.workdir)] runner_argv = deimos.docker.run(run_options, image, launchy.argv, env=env, ports=launchy.ports, cpus=cpus, mems=mems) log_mesos_env(logging.DEBUG) observer = None with open("stdout", "w") as o: # This awkward multi 'with' is a with open("stderr", "w") as e: # concession to 2.6 compatibility with open(os.devnull) as devnull: log.info(deimos.cmd.present(runner_argv)) self.runner = subprocess.Popen(runner_argv, stdin=devnull, stdout=o, stderr=e) state.pid(self.runner.pid) state.await_cid() state.push() lk_w = state.lock("wait", LOCK_EX) lk_l.unlock() if fork: pid = os.fork() if pid is not 0: state.ids() log.info("Forking watcher into child...") return state.ids() if observer_argv is not None: log.info(deimos.cmd.present(observer_argv)) call = deimos.cmd.in_sh(observer_argv, allstderr=False) # TODO: Collect these leaking file handles. obs_out = open(state.resolve("observer.out"), "w+") obs_err = open(state.resolve("observer.err"), "w+") # If the Mesos executor sees LIBPROCESS_PORT=0 (which # is passed by the slave) there are problems when it # attempts to bind. ("Address already in use"). # Purging both LIBPROCESS_* net variables, to be safe. for v in ["LIBPROCESS_PORT", "LIBPROCESS_IP"]: if v in os.environ: del os.environ[v] observer = subprocess.Popen(call, stdin=devnull, stdout=obs_out, stderr=obs_err, close_fds=True) data = Run(data=True)(deimos.docker.wait(state.cid())) state.exit(data) lk_w.unlock() for p, arr in [(self.runner, runner_argv), (observer, observer_argv)]: if p is None: continue thread = threading.Thread(target=p.wait) thread.start() thread.join(10) if thread.is_alive(): log.warning(deimos.cmd.present(arr, "SIGTERM after 10s")) p.terminate() thread.join(1) if thread.is_alive(): log.warning(deimos.cmd.present(arr, "SIGKILL after 1s")) p.kill() msg = deimos.cmd.present(arr, p.wait()) if p.wait() == 0: log.info(msg) else: log.warning(msg) return state.exit()
def launch(self, container_id, *args): log.info(" ".join([container_id] + list(args))) deimos.sig.install(self.sig_proxy) run_options = [] state = deimos.state.State(self.state_root, mesos_id=container_id) state.push() lk_l = state.lock("launch", LOCK_EX) mesos_directory() task = protos.TaskInfo() task.ParseFromString(sys.stdin.read()) for line in proto_lines(task): log.debug(line) state.executor_id = executor_id(task) state.push() state.ids() url, options = self.container_settings.override(*container(task)) pre, image = url.split("docker:///") if pre != "": raise Err("URL '%s' is not a valid docker:// URL!" % url) if image == "": image = self.default_image(task) log.info("image = %s", image) run_options += ["--sig-proxy"] run_options += ["--rm"] # This is how we ensure container cleanup run_options += ["--cidfile", state.resolve("cid")] place_uris(task, self.shared_dir, self.optimistic_unpack) run_options += ["-w", self.workdir] # Docker requires an absolute path to a source filesystem, separated # from the bind path in the container with a colon, but the absolute # path to the Mesos sandbox might have colons in it (TaskIDs with # timestamps can cause this situation). So we create a soft link to it # and mount that. shared_full = os.path.abspath(self.shared_dir) sandbox_symlink = state.sandbox_symlink(shared_full) run_options += ["-v", "%s:%s" % (sandbox_symlink, self.workdir)] cpus, mems = cpu_and_mem(task) env = [(_.name, _.value) for _ in task.command.environment.variables] run_options += options # We need to wrap the call to Docker in a call to the Mesos executor # if no executor is passed as part of the task. We need to pass the # MESOS_* environment variables in to the container if we're going to # start an executor. observer_argv = None if needs_executor_wrapper(task): options = ["--mesos-executor", "--observer"] if not (len(args) > 1 and args[0] in options): raise Err("Task %s needs --observer to be set!" % state.eid()) observer_argv = list( args[1:]) + [deimos.path.me(), "wait", "--docker"] else: env += mesos_env() + [("MESOS_DIRECTORY", self.workdir)] runner_argv = deimos.docker.run(run_options, image, argv(task), env=env, ports=ports(task), cpus=cpus, mems=mems) log_mesos_env(logging.DEBUG) observer = None with open("stdout", "w") as o: # This awkward multi 'with' is a with open("stderr", "w") as e: # concession to 2.6 compatibility with open(os.devnull) as devnull: log.info(deimos.cmd.present(runner_argv)) self.runner = subprocess.Popen(runner_argv, stdin=devnull, stdout=o, stderr=e) state.pid(self.runner.pid) state.await_cid() state.push() lk_w = state.lock("wait", LOCK_EX) lk_l.unlock() state.ids() proto_out(protos.ExternalStatus, message="launch: ok") sys.stdout.close() # Mark STDOUT as closed for Python code os.close( 1) # Use low-level call to close OS side of STDOUT if observer_argv is not None: observer_argv += [state.cid()] log.info(deimos.cmd.present(observer_argv)) call = deimos.cmd.in_sh(observer_argv) # TODO: Collect these leaking file handles. obs_out = open(state.resolve("observer.out"), "w+") obs_err = open(state.resolve("observer.err"), "w+") # If the Mesos executor sees LIBPROCESS_PORT=0 (which # is passed by the slave) there are problems when it # attempts to bind. ("Address already in use"). # Purging both LIBPROCESS_* net variables, to be safe. for v in ["LIBPROCESS_PORT", "LIBPROCESS_IP"]: if v in os.environ: del os.environ[v] observer = subprocess.Popen(call, stdin=devnull, stdout=obs_out, stderr=obs_err, close_fds=True) data = Run(data=True)(deimos.docker.wait(state.cid())) state.exit(data) lk_w.unlock() for p, arr in [(self.runner, runner_argv), (observer, observer_argv)]: if p is None or p.wait() == 0: continue log.warning(deimos.cmd.present(arr, p.wait())) return state.exit()