Exemplo n.º 1
0
 def observe(self, *args):
     log.info(" ".join(args))
     state = deimos.state.State(self.state_root, mesos_id=args[0])
     self.state = state
     deimos.sig.install(self.stop_docker_and_resume)
     state.await_launch()
     try:  # Take the wait lock to block calls to wait()
         state.lock("wait", LOCK_SH, seconds=None)
     except IOError as e:  # Allows for signal recovery
         if e.errno != errno.EINTR:
             raise e
         state.lock("wait", LOCK_SH, seconds=1)
     if state.exit() is not None:
         return state.exit()
     raise Err("Wait lock is not held nor is exit file present")
Exemplo n.º 2
0
 def observe(self, *args):
     log.info(" ".join(args))
     state = deimos.state.State(self.state_root, mesos_id=args[0])
     self.state = state
     deimos.sig.install(self.stop_docker_and_resume)
     state.await_launch()
     try:  # Take the wait lock to block calls to wait()
         state.lock("wait", LOCK_SH, seconds=None)
     except IOError as e:                       # Allows for signal recovery
         if e.errno != errno.EINTR:
             raise e
         state.lock("wait", LOCK_SH, seconds=1)
     if state.exit() is not None:
         return state.exit()
     raise Err("Wait lock is not held nor is exit file present")
Exemplo n.º 3
0
 def containers(self, *args):
     log.info(" ".join(args))
     data = Run(data=True)(deimos.docker.docker("ps", "--no-trunc", "-q"))
     mesos_ids = []
     for line in data.splitlines():
         cid = line.strip()
         state = deimos.state.State(self.state_root, docker_id=cid)
         if not state.exists():
             continue
         try:
             state.lock("wait", LOCK_SH | LOCK_NB)
         except deimos.flock.Err:     # LOCK_EX held, so launch() is running
             mesos_ids += [state.mesos_container_id()]
     containers = Containers()
     for mesos_id in mesos_ids:
         container = containers.containers.add()
         container.value = mesos_id
     recordio.writeProto(containers)
     return 0
Exemplo n.º 4
0
 def containers(self, *args):
     log.info(" ".join(args))
     data = Run(data=True)(deimos.docker.docker("ps", "--no-trunc", "-q"))
     mesos_ids = []
     for line in data.splitlines():
         cid = line.strip()
         state = deimos.state.State(self.state_root, docker_id=cid)
         if not state.exists():
             continue
         try:
             state.lock("wait", LOCK_SH | LOCK_NB)
         except deimos.flock.Err:  # LOCK_EX held, so launch() is running
             mesos_ids += [state.mesos_container_id()]
     containers = Containers()
     for mesos_id in mesos_ids:
         container = containers.containers.add()
         container.value = mesos_id
     recordio.writeProto(containers)
     return 0
Exemplo n.º 5
0
 def destroy(self, destroy_pb, *args):
     log.info(" ".join(args))
     container_id = destroy_pb.container_id.value
     state = deimos.state.State(self.state_root, mesos_id=container_id)
     state.await_launch()
     lk_d = state.lock("destroy", LOCK_EX)
     if state.exit() is None:
         Run()(deimos.docker.stop(state.cid()))
     else:
         log.info("Container is stopped")
     return 0
Exemplo n.º 6
0
 def wait(self, *args):
     log.info(" ".join(list(args)))
     if list(args[0:1]) != ["--docker"]:
         return  # We rely on the Mesos default wait strategy in general
     # In Docker mode, we use Docker wait to wait for the container and
     # then exit with the returned exit code. The passed in ID should be a
     # Docker CID, not a Mesos container ID.
     state = deimos.state.State(self.state_root, docker_id=args[1])
     self.state = state
     deimos.sig.install(self.stop_docker_and_resume)
     state.await_launch()
     try:
         state.lock("wait", LOCK_SH, seconds=None)
     except IOError as e:  # Allows for signal recovery
         if e.errno != errno.EINTR:
             raise e
         state.lock("wait", LOCK_SH, 1)
     if state.exit() is not None:
         return state.exit()
     raise Err("Wait lock is not held nor is exit file present")
Exemplo n.º 7
0
 def wait(self, *args):
     log.info(" ".join(list(args)))
     if list(args[0:1]) != ["--docker"]:
         return      # We rely on the Mesos default wait strategy in general
     # In Docker mode, we use Docker wait to wait for the container and
     # then exit with the returned exit code. The passed in ID should be a
     # Docker CID, not a Mesos container ID.
     state = deimos.state.State(self.state_root, docker_id=args[1])
     self.state = state
     deimos.sig.install(self.signal_docker_and_resume)
     state.await_launch()
     try:
         state.lock("wait", LOCK_SH, seconds=None)
     except IOError as e:                       # Allows for signal recovery
         if e.errno != errno.EINTR:
             raise e
         state.lock("wait", LOCK_SH, 1)
     if state.exit() is not None:
         return state.exit()
     raise Err("Wait lock is not held nor is exit file present")
Exemplo n.º 8
0
 def destroy(self, destroy_pb, *args):
     log.info(" ".join(args))
     container_id = destroy_pb.container_id.value
     state = deimos.state.State(self.state_root, mesos_id=container_id)
     state.await_launch()
     lk_d = state.lock("destroy", LOCK_EX)
     if state.exit() is None:
         Run()(deimos.docker.stop(state.cid()))
     else:
         log.info("Container is stopped")
     return 0
Exemplo n.º 9
0
 def destroy(self, container_id, *args):
     log.info(" ".join([container_id] + list(args)))
     state = deimos.state.State(self.state_root, mesos_id=container_id)
     state.await_launch()
     lk_d = state.lock("destroy", LOCK_EX)
     if state.exit() is not None:
         Run()(deimos.docker.stop(state.cid()))
     else:
         log.info("Container is stopped")
     if not sys.stdout.closed:
         # If we're called as part of the signal handler set up by launch,
         # STDOUT is probably closed already. Writing the Protobuf would
         # only result in a bevy of error messages.
         proto_out(protos.ExternalStatus, message="destroy: ok")
     return 0
Exemplo n.º 10
0
 def destroy(self, container_id, *args):
     log.info(" ".join([container_id] + list(args)))
     state = deimos.state.State(self.state_root, mesos_id=container_id)
     state.await_launch()
     lk_d = state.lock("destroy", LOCK_EX)
     if state.exit() is not None:
         Run()(deimos.docker.stop(state.cid()))
     else:
         log.info("Container is stopped")
     if not sys.stdout.closed:
         # If we're called as part of the signal handler set up by launch,
         # STDOUT is probably closed already. Writing the Protobuf would
         # only result in a bevy of error messages.
         proto_out(protos.ExternalStatus, message="destroy: ok")
     return 0
Exemplo n.º 11
0
 def destroy(self, destroy_pb, *args):
     log.info(" ".join(args))
     container_id = destroy_pb.container_id.value
     state = deimos.state.State(self.state_root, mesos_id=container_id)
     state.await_launch()
     lk_d = state.lock("destroy", LOCK_EX)
     if state.exit() is None:
         container_id = state.docker_id
         log.debug("Unwiring the container %s from MidoNet", container_id)
         try:
             log.info("state_root = %s", dir(state))
             midonet.unwire_container_from_midonet(container_id)
             log.debug("Successfully unwired the container %s from MidoNet " \
                       "bridge", container_id)
         except Exception as ex:
             log.error(traceback.format_exc())
         Run()(deimos.docker.stop(state.cid()))
     else:
         log.info("Container is stopped")
     return 0
Exemplo n.º 12
0
 def destroy(self, destroy_pb, *args):
     log.info(" ".join(args))
     container_id = destroy_pb.container_id.value
     state = deimos.state.State(self.state_root, mesos_id=container_id)
     state.await_launch()
     lk_d = state.lock("destroy", LOCK_EX)
     if state.exit() is None:
         container_id = state.docker_id
         log.debug("Unwiring the container %s from MidoNet", container_id)
         try:
             log.info("state_root = %s", dir(state))
             midonet.unwire_container_from_midonet(container_id)
             log.debug("Successfully unwired the container %s from MidoNet " \
                       "bridge", container_id)
         except Exception as ex:
             log.error(traceback.format_exc())
         Run()(deimos.docker.stop(state.cid()))
     else:
         log.info("Container is stopped")
     return 0
Exemplo n.º 13
0
 def wait(self, wait_pb, *args):
     log.info(" ".join(args))
     container_id = wait_pb.container_id.value
     state = deimos.state.State(self.state_root, mesos_id=container_id)
     self.state = state
     deimos.sig.install(self.stop_docker_and_resume)
     state.await_launch()
     try:  # Wait for the observe lock so observe completes first
         state.lock("observe", LOCK_SH, seconds=None)
         state.lock("wait", LOCK_SH, seconds=None)
     except IOError as e:                       # Allows for signal recovery
         if e.errno != errno.EINTR:
             raise e
         state.lock("observe", LOCK_SH, seconds=1)
         state.lock("wait", LOCK_SH, seconds=1)
     termination = (state.exit() if state.exit() is not None else 64) << 8
     recordio.write(Termination,
                    killed=False,
                    message="",
                    status=termination)
     if state.exit() is not None:
         return state.exit()
     raise Err("Wait lock is not held nor is exit file present")
Exemplo n.º 14
0
 def wait(self, wait_pb, *args):
     log.info(" ".join(args))
     container_id = wait_pb.container_id.value
     state = deimos.state.State(self.state_root, mesos_id=container_id)
     self.state = state
     deimos.sig.install(self.stop_docker_and_resume)
     state.await_launch()
     try:  # Wait for the observe lock so observe completes first
         state.lock("observe", LOCK_SH, seconds=None)
         state.lock("wait", LOCK_SH, seconds=None)
     except IOError as e:  # Allows for signal recovery
         if e.errno != errno.EINTR:
             raise e
         state.lock("observe", LOCK_SH, seconds=1)
         state.lock("wait", LOCK_SH, seconds=1)
     termination = (state.exit() if state.exit() is not None else 64) << 8
     recordio.write(Termination,
                    killed=False,
                    message="",
                    status=termination)
     if state.exit() is not None:
         return state.exit()
     raise Err("Wait lock is not held nor is exit file present")
Exemplo n.º 15
0
 def wait(self, *args):
     log.info(" ".join(args))
     observe = False
     # NB: The "@@observe-docker@@" variant is a work around for Mesos's
     #     option parser. There is a fix in the pipeline.
     if list(args[0:1]) in [ ["--observe-docker"], ["@@observe-docker@@"] ]:
         # In Docker mode, we use Docker wait to wait for the container
         # and then exit with the returned exit code. The Docker CID is
         # passed on the command line.
         state = deimos.state.State(self.state_root, docker_id=args[1])
         observe = True
     else:
         message = recordio.read(Wait)
         container_id = message.container_id.value
         state = deimos.state.State(self.state_root, mesos_id=container_id)
     self.state = state
     deimos.sig.install(self.stop_docker_and_resume)
     state.await_launch()
     try:
         if not observe:
             state.lock("observe", LOCK_SH, seconds=None)
         state.lock("wait", LOCK_SH, seconds=None)
     except IOError as e:                       # Allows for signal recovery
         if e.errno != errno.EINTR:
             raise e
         if not observe:
             state.lock("observe", LOCK_SH, seconds=1)
         state.lock("wait", LOCK_SH, seconds=1)
     termination = (state.exit() if state.exit() is not None else 64) << 8
     recordio.write(Termination,
                    killed  = False,
                    message = "",
                    status  = termination)
     if state.exit() is not None:
         return state.exit()
     raise Err("Wait lock is not held nor is exit file present")
Exemplo n.º 16
0
    def launch(self, launch_pb, *args):
        log.info(" ".join(args))
        fork = False if "--no-fork" in args else True
        deimos.sig.install(self.log_signal)
        run_options = []
        launchy = deimos.mesos.Launch(launch_pb)
        state = deimos.state.State(self.state_root,
                                   mesos_id=launchy.container_id)
        state.push()
        lk_l = state.lock("launch", LOCK_EX)
        state.executor_id = launchy.executor_id
        state.push()
        state.ids()
        mesos_directory()  # Redundant?
        if launchy.directory:
            os.chdir(launchy.directory)
        # TODO: if launchy.user:
        #           os.seteuid(launchy.user)
        url, options = launchy.container
        options, trailing_argv = split_on(options, "//")
        url, options = self.container_settings.override(url, options)

        true_argv = launchy.argv if trailing_argv is None else trailing_argv

        image = self.determine_image(url, launchy)
        log.info("image  = %s", image)
        run_options += ["--sig-proxy"]
        run_options += ["--rm"]       # This is how we ensure container cleanup
        run_options += ["--cidfile", state.resolve("cid")]

        place_uris(launchy, self.shared_dir, self.optimistic_unpack)
        run_options += ["-w", self.workdir]

        # Docker requires an absolute path to a source filesystem, separated
        # from the bind path in the container with a colon, but the absolute
        # path to the Mesos sandbox might have colons in it (TaskIDs with
        # timestamps can cause this situation). So we create a soft link to it
        # and mount that.
        shared_full = os.path.abspath(self.shared_dir)
        sandbox_symlink = state.sandbox_symlink(shared_full)
        run_options += ["-v", "%s:%s" % (sandbox_symlink, self.workdir)]

        cpus, mems = launchy.cpu_and_mem
        env = launchy.env
        run_options += options

        # We need to wrap the call to Docker in a call to the Mesos executor
        # if no executor is passed as part of the task. We need to pass the
        # MESOS_* environment variables in to the container if we're going to
        # start an executor.
        observer_argv = None
        if launchy.needs_observer:
            # NB: The "@@docker@@" variant is a work around for Mesos's option
            # parser. There is a fix in the pipeline.
            observer_argv = [mesos_executor(), "--override",
                             deimos.path.me(), "observe", state.mesos_id]
            state.lock("observe", LOCK_EX | LOCK_NB)     # Explanation of Locks
            # When the observer is running, we would like its call to
            # observe() to finish before all the wait(); and we'd like the
            # observer to have a chance to report TASK_FINISHED before the
            # calls to wait() report their results (which would result in a
            # TASK_FAILED).
            #
            # For this reason, we take the "observe" lock in launch(), before
            # we call the observer and before releasing the "launch" or "wait"
            # locks.
            #
            # Calls to observe() actually skip locking "observe"; but wait()
            # calls must take this lock. The "observe" lock is held by
            # launch() until the observer executor completes, at which point
            # we can be reasonably sure its status was propagated to the Mesos
            # slave.
        else:
            env += mesos_env() + [("MESOS_DIRECTORY", self.workdir)]

        self.place_dockercfg()

        runner_argv = deimos.docker.run(run_options, image, true_argv,
                                        env=env, ports=launchy.ports,
                                        cpus=cpus, mems=mems)

        log_mesos_env(logging.DEBUG)

        observer = None
        with open("stdout", "w") as o:        # This awkward multi 'with' is a
            with open("stderr", "w") as e:    # concession to 2.6 compatibility
                with open(os.devnull) as devnull:
                    log.info(deimos.cmd.present(runner_argv))
                    self.runner = subprocess.Popen(runner_argv, stdin=devnull,
                                                                stdout=o,
                                                                stderr=e)
                    state.pid(self.runner.pid)
                    state.await_cid()
                    state.push()
                    lk_w = state.lock("wait", LOCK_EX)
                    lk_l.unlock()
                    if fork:
                        pid = os.fork()
                        if pid is not 0:
                            state.ids()
                            log.info("Forking watcher into child...")
                            return
                    state.ids()
                    if observer_argv is not None:
                        log.info(deimos.cmd.present(observer_argv))
                        call = deimos.cmd.in_sh(observer_argv, allstderr=False)
                        # TODO: Collect these leaking file handles.
                        obs_out = open(state.resolve("observer.out"), "w+")
                        obs_err = open(state.resolve("observer.err"), "w+")
                        # If the Mesos executor sees LIBPROCESS_PORT=0 (which
                        # is passed by the slave) there are problems when it
                        # attempts to bind. ("Address already in use").
                        # Purging both LIBPROCESS_* net variables, to be safe.
                        for v in ["LIBPROCESS_PORT", "LIBPROCESS_IP"]:
                            if v in os.environ:
                                del os.environ[v]
                        observer = subprocess.Popen(call, stdin=devnull,
                                                          stdout=obs_out,
                                                          stderr=obs_err,
                                                          close_fds=True)
        data = Run(data=True)(deimos.docker.wait(state.cid()))
        state.exit(data)
        lk_w.unlock()
        for p, arr in [(self.runner, runner_argv), (observer, observer_argv)]:
            if p is None:
                continue
            thread = threading.Thread(target=p.wait)
            thread.start()
            thread.join(10)
            if thread.is_alive():
                log.warning(deimos.cmd.present(arr, "SIGTERM after 10s"))
                p.terminate()
            thread.join(1)
            if thread.is_alive():
                log.warning(deimos.cmd.present(arr, "SIGKILL after 1s"))
                p.kill()
            msg = deimos.cmd.present(arr, p.wait())
            if p.wait() == 0:
                log.info(msg)
            else:
                log.warning(msg)
        return state.exit()
Exemplo n.º 17
0
    def launch(self, container_id, *args):
        log.info(" ".join([container_id] + list(args)))
        deimos.sig.install(self.sig_proxy)
        run_options = []
        state = deimos.state.State(self.state_root, mesos_id=container_id)
        state.push()
        lk_l = state.lock("launch", LOCK_EX)
        mesos_directory()
        task = protos.TaskInfo()
        task.ParseFromString(sys.stdin.read())
        for line in proto_lines(task):
            log.debug(line)
        state.executor_id = executor_id(task)
        state.push()
        state.ids()
        url, options = self.container_settings.override(*container(task))
        pre, image = url.split("docker:///")
        if pre != "":
            raise Err("URL '%s' is not a valid docker:// URL!" % url)
        if image == "":
            image = self.default_image(task)
        log.info("image  = %s", image)
        run_options += ["--sig-proxy"]
        run_options += ["--rm"]  # This is how we ensure container cleanup
        run_options += ["--cidfile", state.resolve("cid")]

        place_uris(task, self.shared_dir, self.optimistic_unpack)
        run_options += ["-w", self.workdir]

        # Docker requires an absolute path to a source filesystem, separated
        # from the bind path in the container with a colon, but the absolute
        # path to the Mesos sandbox might have colons in it (TaskIDs with
        # timestamps can cause this situation). So we create a soft link to it
        # and mount that.
        shared_full = os.path.abspath(self.shared_dir)
        sandbox_symlink = state.sandbox_symlink(shared_full)
        run_options += ["-v", "%s:%s" % (sandbox_symlink, self.workdir)]

        cpus, mems = cpu_and_mem(task)
        env = [(_.name, _.value) for _ in task.command.environment.variables]
        run_options += options

        # We need to wrap the call to Docker in a call to the Mesos executor
        # if no executor is passed as part of the task. We need to pass the
        # MESOS_* environment variables in to the container if we're going to
        # start an executor.
        observer_argv = None
        if needs_executor_wrapper(task):
            options = ["--mesos-executor", "--observer"]
            if not (len(args) > 1 and args[0] in options):
                raise Err("Task %s needs --observer to be set!" % state.eid())
            observer_argv = list(
                args[1:]) + [deimos.path.me(), "wait", "--docker"]
        else:
            env += mesos_env() + [("MESOS_DIRECTORY", self.workdir)]

        runner_argv = deimos.docker.run(run_options,
                                        image,
                                        argv(task),
                                        env=env,
                                        ports=ports(task),
                                        cpus=cpus,
                                        mems=mems)

        log_mesos_env(logging.DEBUG)

        observer = None
        with open("stdout", "w") as o:  # This awkward multi 'with' is a
            with open("stderr", "w") as e:  # concession to 2.6 compatibility
                with open(os.devnull) as devnull:
                    log.info(deimos.cmd.present(runner_argv))
                    self.runner = subprocess.Popen(runner_argv,
                                                   stdin=devnull,
                                                   stdout=o,
                                                   stderr=e)
                    state.pid(self.runner.pid)
                    state.await_cid()
                    state.push()
                    lk_w = state.lock("wait", LOCK_EX)
                    lk_l.unlock()
                    state.ids()
                    proto_out(protos.ExternalStatus, message="launch: ok")
                    sys.stdout.close()  # Mark STDOUT as closed for Python code
                    os.close(
                        1)  # Use low-level call to close OS side of STDOUT
                    if observer_argv is not None:
                        observer_argv += [state.cid()]
                        log.info(deimos.cmd.present(observer_argv))
                        call = deimos.cmd.in_sh(observer_argv)
                        # TODO: Collect these leaking file handles.
                        obs_out = open(state.resolve("observer.out"), "w+")
                        obs_err = open(state.resolve("observer.err"), "w+")
                        # If the Mesos executor sees LIBPROCESS_PORT=0 (which
                        # is passed by the slave) there are problems when it
                        # attempts to bind. ("Address already in use").
                        # Purging both LIBPROCESS_* net variables, to be safe.
                        for v in ["LIBPROCESS_PORT", "LIBPROCESS_IP"]:
                            if v in os.environ:
                                del os.environ[v]
                        observer = subprocess.Popen(call,
                                                    stdin=devnull,
                                                    stdout=obs_out,
                                                    stderr=obs_err,
                                                    close_fds=True)
        data = Run(data=True)(deimos.docker.wait(state.cid()))
        state.exit(data)
        lk_w.unlock()
        for p, arr in [(self.runner, runner_argv), (observer, observer_argv)]:
            if p is None or p.wait() == 0:
                continue
            log.warning(deimos.cmd.present(arr, p.wait()))
        return state.exit()
Exemplo n.º 18
0
    def launch(self, container_id, *args):
        log.info(" ".join([container_id] + list(args)))
        deimos.sig.install(self.sig_proxy)
        run_options = []
        state = deimos.state.State(self.state_root, mesos_id=container_id)
        state.push()
        lk_l = state.lock("launch", LOCK_EX)
        mesos_directory()
        task = protos.TaskInfo()
        task.ParseFromString(sys.stdin.read())
        state.executor_id = executor_id(task)
        state.push()
        state.ids()
        url, options = self.container_settings.override(*container(task))
        pre, image = url.split("docker:///")
        if pre != "":
            raise Err("URL '%s' is not a valid docker:// URL!" % url)
        if image == "":
            image = self.default_image(task)
        log.info("image  = %s", image)
        run_options += [ "--sig-proxy" ]
        run_options += [ "--rm" ]     # This is how we ensure container cleanup
        run_options += [ "--cidfile", state.resolve("cid") ]

        place_uris(task, self.shared_dir, self.optimistic_unpack)
        run_options += [ "-w", self.workdir ]

        # Docker requires an absolute path to a source filesystem, separated
        # from the bind path in the container with a colon, but the absolute
        # path to the Mesos sandbox might have colons in it (TaskIDs with
        # timestamps can cause this situation). So we create a soft link to it
        # and mount that.
        shared_full = os.path.abspath(self.shared_dir)
        sandbox_symlink = state.sandbox_symlink(shared_full)
        run_options += [ "-v", "%s:%s" % (sandbox_symlink, self.workdir) ]

        cpus, mems = cpu_and_mem(task)
        env = [(_.name, _.value) for _ in task.command.environment.variables]
        run_options += options

        # We need to wrap the call to Docker in a call to the Mesos executor
        # if no executor is passed as part of the task. We need to pass the
        # MESOS_* environment variables in to the container if we're going to
        # start an executor.
        observer_argv = None
        if needs_executor_wrapper(task):
            options = ["--mesos-executor", "--executor"]
            if not(len(args) > 1 and args[0] in options):
                raise Err("Task %s needs --executor to be set!" % state.tid())
            observer_argv = [ args[1], deimos.path.me(), "wait", "--docker" ]
        else:
            env += mesos_env() + [("MESOS_DIRECTORY", self.workdir)]

        runner_argv = deimos.docker.run(run_options, image, argv(task),
                                        env=env, ports=ports(task),
                                        cpus=cpus, mems=mems)

        log_mesos_env(logging.DEBUG)

        observer = None
        with open("stdout", "w") as o:        # This awkward multi 'with' is a
            with open("stderr", "w") as e:    # concession to 2.6 compatibility
                with open(os.devnull) as devnull:
                    log.info(deimos.cmd.present(runner_argv))
                    self.runner = subprocess.Popen(runner_argv, stdin=devnull,
                                                                stdout=o,
                                                                stderr=e)
                    state.pid(self.runner.pid)
                    state.await_cid()
                    state.push()
                    lk_w = state.lock("wait", LOCK_EX)
                    lk_l.unlock()
                    state.ids()
                    proto_out(protos.ExternalStatus, message="launch: ok")
                    sys.stdout.close()  # Mark STDOUT as closed for Python code
                    os.close(1) # Use low-level call to close OS side of STDOUT
                    if observer_argv is not None:
                        observer_argv += [state.cid()]
                        log.info(deimos.cmd.present(observer_argv))
                        call = deimos.cmd.in_sh(observer_argv)
                        # TODO: Collect these leaking file handles.
                        obs_out = open(state.resolve("observer.out"), "w+")
                        obs_err = open(state.resolve("observer.err"), "w+")
                        # If the Mesos executor sees LIBPROCESS_PORT=0 (which
                        # is passed by the slave) there are problems when it
                        # attempts to bind. ("Address already in use").
                        # Purging both LIBPROCESS_* net variables, to be safe.
                        for v in ["LIBPROCESS_PORT", "LIBPROCESS_IP"]:
                            if v in os.environ:
                                del os.environ[v]
                        observer = subprocess.Popen(call, stdin=devnull,
                                                          stdout=obs_out,
                                                          stderr=obs_err,
                                                          close_fds=True)
        data = Run(data=True)(deimos.docker.wait(state.cid()))
        state.exit(data)
        lk_w.unlock()
        for p, arr in [(self.runner, runner_argv), (observer, observer_argv)]:
            if p is None or p.wait() == 0:
                continue
            log.warning(deimos.cmd.present(arr, p.wait()))
        return state.exit()
Exemplo n.º 19
0
    def launch(self, launch_pb, *args):
        log.info(" ".join(args))
        fork = False if "--no-fork" in args else True
        deimos.sig.install(self.log_signal)
        run_options = []
        launchy = deimos.mesos.Launch(launch_pb)
        state = deimos.state.State(self.state_root,
                                   mesos_id=launchy.container_id)
        state.push()
        lk_l = state.lock("launch", LOCK_EX)
        state.executor_id = launchy.executor_id
        state.push()
        state.ids()
        mesos_directory()  # Redundant?
        if launchy.directory:
            os.chdir(launchy.directory)
        # TODO: if launchy.user:
        #           os.seteuid(launchy.user)
        url, options = launchy.container
        options, trailing_argv = split_on(options, "//")
        url, options = self.container_settings.override(url, options)

        true_argv = launchy.argv if trailing_argv is None else trailing_argv

        image = self.determine_image(url, launchy)
        log.info("image  = %s", image)
        run_options += ["--sig-proxy"]
        run_options += ["--rm"]  # This is how we ensure container cleanup
        run_options += ["--cidfile", state.resolve("cid")]

        place_uris(launchy, self.shared_dir, self.optimistic_unpack)
        run_options += ["-w", self.workdir]

        # Docker requires an absolute path to a source filesystem, separated
        # from the bind path in the container with a colon, but the absolute
        # path to the Mesos sandbox might have colons in it (TaskIDs with
        # timestamps can cause this situation). So we create a soft link to it
        # and mount that.
        shared_full = os.path.abspath(self.shared_dir)
        sandbox_symlink = state.sandbox_symlink(shared_full)
        run_options += ["-v", "%s:%s" % (sandbox_symlink, self.workdir)]

        cpus, mems = launchy.cpu_and_mem
        env = launchy.env
        run_options += options

        env_dict = dict(env)
        if env_dict.get("MIDONET_BRIDGE_ID", None):
            run_options += ["--net=none"]

        # We need to wrap the call to Docker in a call to the Mesos executor
        # if no executor is passed as part of the task. We need to pass the
        # MESOS_* environment variables in to the container if we're going to
        # start an executor.
        observer_argv = None
        if launchy.needs_observer:
            # NB: The "@@docker@@" variant is a work around for Mesos's option
            # parser. There is a fix in the pipeline.
            observer_argv = [
                mesos_executor(), "--override",
                deimos.path.me(), "observe", state.mesos_id
            ]
            state.lock("observe", LOCK_EX | LOCK_NB)  # Explanation of Locks
            # When the observer is running, we would like its call to
            # observe() to finish before all the wait(); and we'd like the
            # observer to have a chance to report TASK_FINISHED before the
            # calls to wait() report their results (which would result in a
            # TASK_FAILED).
            #
            # For this reason, we take the "observe" lock in launch(), before
            # we call the observer and before releasing the "launch" or "wait"
            # locks.
            #
            # Calls to observe() actually skip locking "observe"; but wait()
            # calls must take this lock. The "observe" lock is held by
            # launch() until the observer executor completes, at which point
            # we can be reasonably sure its status was propagated to the Mesos
            # slave.
        else:
            env += mesos_env() + [("MESOS_DIRECTORY", self.workdir)]

        self.place_dockercfg()

        runner_argv = deimos.docker.run(run_options,
                                        image,
                                        true_argv,
                                        env=env,
                                        ports=launchy.ports,
                                        cpus=cpus,
                                        mems=mems)

        log_mesos_env(logging.DEBUG)

        observer = None
        with open("stdout", "w") as o:  # This awkward multi 'with' is a
            with open("stderr", "w") as e:  # concession to 2.6 compatibility
                with open(os.devnull) as devnull:
                    log.info(deimos.cmd.present(runner_argv))
                    self.runner = subprocess.Popen(runner_argv,
                                                   stdin=devnull,
                                                   stdout=o,
                                                   stderr=e)
                    state.pid(self.runner.pid)
                    state.await_cid()
                    log.debug("Wiring the container to MidoNet")
                    try:
                        bridge_id = env_dict.get(
                            "MIDONET_BRIDGE_ID",
                            "78488c47-d1de-4d16-a27a-4e6419dc4f88")
                        container_id = state.docker_id
                        ip_addr = env_dict.get("MIDONET_IP_ADDRESS",
                                               "192.168.100.42")
                        default_gw = env_dict.get("MIDONET_DEFAULT_GATEWAY",
                                                  None)
                        midonet.wire_container_to_midonet(
                            container_id, bridge_id, ip_addr, default_gw)
                        log.debug("Successfully wired the container %s to MidoNet " \
                                  "bridge %s", container_id, bridge_id)
                    except Exception as ex:
                        log.error(traceback.format_exc())

                    state.push()
                    lk_w = state.lock("wait", LOCK_EX)
                    lk_l.unlock()
                    if fork:
                        pid = os.fork()
                        if pid is not 0:
                            state.ids()
                            log.info("Forking watcher into child...")
                            return
                    state.ids()
                    if observer_argv is not None:
                        log.info(deimos.cmd.present(observer_argv))
                        call = deimos.cmd.in_sh(observer_argv, allstderr=False)
                        # TODO: Collect these leaking file handles.
                        obs_out = open(state.resolve("observer.out"), "w+")
                        obs_err = open(state.resolve("observer.err"), "w+")
                        # If the Mesos executor sees LIBPROCESS_PORT=0 (which
                        # is passed by the slave) there are problems when it
                        # attempts to bind. ("Address already in use").
                        # Purging both LIBPROCESS_* net variables, to be safe.
                        for v in ["LIBPROCESS_PORT", "LIBPROCESS_IP"]:
                            if v in os.environ:
                                del os.environ[v]
                        observer = subprocess.Popen(call,
                                                    stdin=devnull,
                                                    stdout=obs_out,
                                                    stderr=obs_err,
                                                    close_fds=True)
        data = Run(data=True)(deimos.docker.wait(state.cid()))
        state.exit(data)
        lk_w.unlock()
        for p, arr in [(self.runner, runner_argv), (observer, observer_argv)]:
            if p is None:
                continue
            thread = threading.Thread(target=p.wait)
            thread.start()
            thread.join(10)
            if thread.is_alive():
                log.warning(deimos.cmd.present(arr, "SIGTERM after 10s"))
                p.terminate()
            thread.join(1)
            if thread.is_alive():
                log.warning(deimos.cmd.present(arr, "SIGKILL after 1s"))
                p.kill()
            msg = deimos.cmd.present(arr, p.wait())
            if p.wait() == 0:
                log.info(msg)
            else:
                log.warning(msg)
        return state.exit()