def __get_instances(self, props, non_existent=False): vms = {} tasks = util.TaskPool() def add_vms(conn): conn.refresh() for vm_name in conn.dominfo.vms: vms.setdefault(vm_name, []).append(conn) for conn in self.conns(): tasks.apply_async(add_vms, [conn]) tasks.wait_all() props = dict((prop["vm_name"], prop) for prop in props) for vm_name, prop in props.iteritems(): if vm_name in vms: yield dict(vm_name=vm_name, vm_state="VM_DIRTY", vm_conns=vms[vm_name], prop=prop) elif non_existent: yield dict(vm_name=vm_name, vm_state="VM_NON_EXISTENT", vm_conns=[], prop=prop)
def __vm_async_apply(self, props, op, *args): result = {} tasks = util.TaskPool() for vm in self.__get_vms(props): tasks.apply_async(getattr(vm, op), args) result[vm.name] = {} tasks.wait_all() return result
def terminate_instances(self, props): """ Terminate instances specified in the given sequence of cloud properties dicts. """ tasks = util.TaskPool() for instance in self.__get_instances(props): for conn in instance["vm_conns"]: tasks.apply_async(self.delete_vm, [conn, instance]) tasks.wait_all()
def __get_all_vms(self): vms = {} tasks = util.TaskPool() def add_vms(conn): conn.refresh() for vm_name in conn.dominfo.vms: vms.setdefault(vm_name, []).append(conn) for conn in self.conns(): tasks.apply_async(add_vms, [conn]) tasks.wait_all() return vms
def conns(self): if self.hosts_online is None: if libvirt.getVersion() < 9004: # libvirt support for no_verify was introduced in 0.9.4 procs = [] for hostport in self.hosts.iterkeys(): host, _, port = hostport.partition(':') args = [ "/usr/bin/ssh", "-oBatchMode=yes", "-oStrictHostKeyChecking=no", "-p{0}".format(port), "root@{0}".format(host), "uptime" ] procs.append(subprocess.Popen(args)) [proc.wait() for proc in procs] self.hosts_online = [] def lv_connect(host, priority, weight): try: conn = PoniLVConn(host, hypervisor=self.hypervisor, keyfile=self.ssh_key, priority=priority, weight=weight) conn.connect() self.hosts_online.append(conn) except (LVPError, libvirt.libvirtError) as ex: self.log.warn("Connection to %r failed: %r", conn.uri, ex) tasks = util.TaskPool() for host, (priority, weight) in self.hosts.iteritems(): tasks.apply_async(lv_connect, [host, priority, weight]) tasks.wait_all() if not self.hosts_online: raise LVPError("No VM hosts available") return list(self.hosts_online)
def wait_instances(self, props, wait_state="running"): """ Wait for all the given instances to reach status specified by the 'wait_state' argument. Returns a dict {instance_id: dict(<updated properties>)} """ assert wait_state == "running", "libvirt only handles running stuff" home = os.getenv("HOME") # collapse props to one entry per vm_name props = dict((prop["vm_name"], prop) for prop in props).values() instances = self.__get_instances(props) self.log.info("deleting existing VM instances") delete_started = time.time() tasks = util.TaskPool() for instance in instances: # Delete any existing instances if required to reinit (the # default) or if the same VM was found from multiple hosts. if instance["prop"].get("reinit", True) or len(instance["vm_conns"]) > 1: for conn in instance["vm_conns"]: tasks.apply_async(self.delete_vm, [conn, instance]) tasks.wait_all() cloning_started = time.time() def clone_instance(instance): prop = instance["prop"] ipv6pre = prop.get("ipv6_prefix") if instance["vm_state"] == "VM_RUNNING": return # done elif instance["vm_state"] == "VM_DIRTY": # turn this into an active instance vm = instance["vm_conns"][0].dominfo.vms[instance["vm_name"]] elif instance["vm_state"] == "VM_NON_EXISTENT": # Select the best place for this host first filtering out nodes # with zero-weight and ones included in the exclude list or # missing from the include list. cands = list(conns) if prop.get("hosts", {}).get("exclude"): cands = [ conn for conn in cands if prop["hosts"]["exclude"] not in conn.host ] if prop.get("hosts", {}).get("include"): cands = [ conn for conn in cands if prop["hosts"]["include"] in conn.host ] conn = self.weighted_random_choice(cands) self.log.info("cloning %r on %r", instance["vm_name"], conn.host) vm = conn.clone_vm(instance["vm_name"], prop, overwrite=True) instance["vm_conns"] = [conn] else: return # XXX instance["vm_state"] = "VM_RUNNING" instance["ipproto"] = prop.get("ipproto", "ipv4") instance["ipv6"] = vm.ipv6_addr(ipv6pre)[0] instance["ssh_key"] = "{0}/.ssh/{1}".format(home, prop["ssh_key"]) instances.append(instance) self.log.info("cloning VM instances") instances = [] conns = [conn for conn in self.conns() if conn.srv_weight > 0] tasks = util.TaskPool() for instance in self.__get_instances(props, non_existent=True): tasks.apply_async(clone_instance, [instance]) tasks.wait_all() boot_started = time.time() # get ipv4 addresses for the hosts (XXX: come up with something better) result = {} tunnels = {} failed = [] objs = [] timeout = 120 start = time.time() for attempt in xrange(1, 1000): elapsed = time.time() - start if elapsed > timeout: raise LVPError("Connecting to {0!r} failed".format(failed)) if attempt > 1: time.sleep(2) self.log.info("getting ip addresses: round #%r, time spent=%.02fs", attempt, elapsed) failed = [] for instance in instances: instance_id = instance["vm_name"] if instance["ipproto"] in instance: # address already exists (ie lookup done or we're using ipv6) if instance_id not in result: addr = instance[instance['ipproto']] result[instance_id] = dict(host=addr, private=dict(ip=addr, dns=addr)) continue conn = instance["vm_conns"][0] if conn not in tunnels: client = SSHClientLVP() client.set_missing_host_key_policy( paramiko.AutoAddPolicy()) client.connect(conn.host, port=conn.port, username=conn.username, key_filename=conn.keyfile) tunnels[conn] = client trans = tunnels[conn].get_transport() ipv4 = None try: tunchan = trans.open_channel("direct-tcpip", (instance["ipv6"], 22), ("localhost", 0)) client = SSHClientLVP() client.set_missing_host_key_policy( paramiko.AutoAddPolicy()) client.connect_socket(tunchan, username="******", key_filename=instance["ssh_key"]) cmdchan = client.get_transport().open_session() cmdchan.set_combine_stderr(True) cmdchan.exec_command('ip -4 addr show scope global') cmdchan.shutdown_write() exec_start = time.time() while (not cmdchan.exit_status_ready()) and ( (time.time() - exec_start) < 10.0): time.sleep(0.05) if cmdchan.exit_status_ready(): exit_code = cmdchan.recv_exit_status() if exit_code != 0: self.log.warning( "remote command non-zero exit status: exitcode=%s, %r", exit_code, instance) data = cmdchan.recv(1024) objs.extend((tunchan, cmdchan, client)) except (socket.error, socket.gaierror, paramiko.SSHException) as ex: self.log.warning("connecting to %r [%s] failed: %r", instance, instance["ipv6"], ex) else: if data: ipv4 = data.partition(" inet ")[2].partition("/")[0] else: self.log.warning("no data received from: %r", instance) if not ipv4: failed.append(instance) else: self.log.info("Got address %r for %s", ipv4, instance["vm_name"]) instance['ipv4'] = ipv4 addr = instance[instance['ipproto']] result[instance_id] = dict(host=addr, private=dict(ip=addr, dns=addr)) if not failed: break self.log.info( "instances ready: delete {1:.2f}s, cloning {2:.2f}s, boot {0:.2f}s" .format(cloning_started - delete_started, boot_started - cloning_started, time.time() - boot_started)) [client.close() for client in tunnels.itervalues()] self.disconnect() return result