def _kill_controller(p, c, m, run_name, startsec, cloudinitd, epucontroller): c.log.info("Killing controller (at evaluation second %s)" % startsec) # TODO: fix this hardcoded service name cloudinitd_terminate = get_cloudinit_for_destruction(p, c, m, run_name) svc = cloudinitd_terminate.get_service('epu-onesleeper') svc.shutdown() epucontroller.terminate() cmd = 'cloudinitd repair %s' % run_name c.log.debug("command = '%s'" % cmd) timeout = 600.0 # seconds (k, rc, out, err) = epumgmt.defaults.child.child(cmd, timeout=timeout) if k: c.log.error("TIMED OUT: '%s'" % cmd) if not rc: c.log.debug("command succeeded: '%s'" % cmd) epucontroller.start() else: errmsg = "problem running command, " if rc < 0: errmsg += "killed by signal:" if rc > 0: errmsg += "exited non-zero:" errmsg += "'%s' ::: return code" % cmd errmsg += ": %d ::: error:\n%s\noutput:\n%s" % (rc, out, err) # these commands will commonly fail if c.trace: self.c.log.debug(errmsg)
def terminate(p, c, m, run_name, cloudinitd): """Destroy all VM instances that are part of the run. """ m.remote_svc_adapter.initialize(m, run_name, cloudinitd) provisioner_kill = m.remote_svc_adapter.is_channel_open() if not provisioner_kill: c.log.warn("Problem with access to the services, cannot terminate workers without this channel") c.log.info("Killing only the cloudinit.d-launched nodes.") else: c.log.info("Terminating all workers in run '%s'" % run_name) if m.remote_svc_adapter.kill_all_workers(): c.log.info("Terminated all workers in run '%s'" % run_name) else: c.log.error("Problem triggering worker termination, you need to make sure these are terminated manually!") c.log.info("Fetching provisioner logs") em_core_logfetch.fetch_by_service_name(p, c, m, run_name, "provisioner") c.log.info("Fetched provisioner logs") raise UnexpectedError("Problem triggering worker termination, you need to make sure these are terminated manually!") em_core_logfetch.fetch_by_service_name(p, c, m, run_name, "provisioner") c.log.info("Shutting down all services launched by cloudinit.d for '%s'" % run_name) # Need a different instantiation of cloudinitd for shutdown cloudinitd_terminate = get_cloudinit_for_destruction(p, c, m, run_name) cloudinitd_terminate.shutdown() cloudinitd_terminate.block_until_complete(poll_period=1.0) c.log.info("Shutdown all services launched by cloudinit.d for '%s'" % run_name)
def terminate(p, c, m, run_name, cloudinitd): """Destroy all VM instances that are part of the run. """ m.remote_svc_adapter.initialize(m, run_name, cloudinitd) provisioner_kill = m.remote_svc_adapter.is_channel_open() if not provisioner_kill: c.log.warn("Problem with access to the services, cannot terminate workers without this channel") c.log.info("Killing only the cloudinit.d-launched nodes.") else: if not m.remote_svc_adapter.kill_all_workers(): raise UnexpectedError("Problem triggering worker termination, you need to make sure these are terminated manually!") # TODO: here, we need to make sure the provisioner is done killing things with some mechanism like RPC. # This will require some thought and design. For now, this happens fairly instantly if # the IaaS service is available, etc. But we should know for sure before proceeding. c.log.info("Sent signal to the provisioner, waiting for it to terminate all workers in run '%s'" % run_name) time.sleep(5) # Need a different instantiation of cloudinitd for shutdown cloudinitd_terminate = get_cloudinit_for_destruction(p, c, m, run_name) cloudinitd_terminate.shutdown() cloudinitd_terminate.block_until_complete(poll_period=1.0) c.log.info("Shutdown all services launched by cloudinit.d for '%s'" % run_name)