def reconcile_unsynced_disks(self, server_id, db_server, gnt_server): building_time = self.event_time - BUILDING_NIC_TIMEOUT db_disks = db_server.volumes.exclude(status="CREATING", created__lte=building_time) \ .filter(deleted=False)\ .order_by("id") gnt_disks = gnt_server["disks"] gnt_disks_parsed = backend_mod.parse_instance_disks(gnt_disks) disks_changed = len(db_disks) != len(gnt_disks) for db_disk, gnt_disk in zip(db_disks, sorted(gnt_disks_parsed.items())): gnt_disk_id, gnt_disk = gnt_disk if (db_disk.id == gnt_disk_id) and\ backend_mod.disks_are_equal(db_disk, gnt_disk): continue else: disks_changed = True break if disks_changed: msg = "Found unsynced disks for server '%s'.\n"\ "\tDB:\n\t\t%s\n\tGaneti:\n\t\t%s" db_disks_str = "\n\t\t".join(map(format_db_disk, db_disks)) gnt_disks_str = "\n\t\t".join( map(format_gnt_disk, sorted(gnt_disks_parsed.items()))) self.log.info(msg, server_id, db_disks_str, gnt_disks_str) if self.options["fix_unsynced_disks"]: vm = get_locked_server(server_id) backend_mod.process_op_status( vm=vm, etime=self.event_time, jobid=-0, opcode="OP_INSTANCE_SET_PARAMS", status='success', logmsg="Reconciliation: simulated Ganeti event", disks=gnt_disks)
def reconcile_unsynced_disks(self, server_id, db_server, gnt_server, atomic_context=None): building_time = self.event_time - BUILDING_NIC_TIMEOUT db_disks = db_server.volumes.exclude(status="CREATING", created__lte=building_time) \ .filter(deleted=False)\ .order_by("id") gnt_disks = gnt_server["disks"] gnt_disks_parsed = backend_mod.parse_instance_disks(gnt_disks) disks_changed = len(db_disks) != len(gnt_disks) for db_disk, gnt_disk in zip(db_disks, sorted(gnt_disks_parsed.items())): gnt_disk_id, gnt_disk = gnt_disk if (db_disk.id == gnt_disk_id) and\ backend_mod.disks_are_equal(db_disk, gnt_disk): continue else: disks_changed = True break if disks_changed: msg = "Found unsynced disks for server '%s'.\n"\ "\tDB:\n\t\t%s\n\tGaneti:\n\t\t%s" db_disks_str = "\n\t\t".join(map(format_db_disk, db_disks)) gnt_disks_str = "\n\t\t".join(map(format_gnt_disk, sorted(gnt_disks_parsed.items()))) self.log.info(msg, server_id, db_disks_str, gnt_disks_str) if self.options["fix_unsynced_disks"]: vm = get_locked_server(server_id) backend_mod.process_op_status( vm=vm, etime=self.event_time, jobid=-0, opcode="OP_INSTANCE_SET_PARAMS", status='success', logmsg="Reconciliation: simulated Ganeti event", disks=gnt_disks, atomic_context=atomic_context)
def update_db(vm, msg, event_time): """Process a notification of type 'ganeti-op-status'""" log.debug("Processing ganeti-op-status msg: %s", msg) if msg['type'] != "ganeti-op-status": log.error("Message is of unknown type %s.", msg['type']) return operation = msg["operation"] status = msg["status"] jobID = msg["jobId"] logmsg = msg["logmsg"] nics = msg.get("instance_nics", None) disks = msg.get("instance_disks", None) job_fields = msg.get("job_fields", {}) result = msg.get("result", []) # Special case: OP_INSTANCE_CREATE with opportunistic locking may fail # if all Ganeti nodes are already locked. Retry the job without # opportunistic locking.. if (operation == "OP_INSTANCE_CREATE" and status == "error" and job_fields.get("opportunistic_locking", False)): try: error_code = result[1][1] except IndexError: error_code = None if error_code == rapi.ECODE_TEMP_NORES: if vm.backendjobid != jobID: # The job has already been retried! return # Remove extra fields [job_fields.pop(f, None) for f in ("OP_ID", "reason")] # Remove 'pnode' and 'snode' if they were set by Ganeti iallocator. # Ganeti will fail if both allocator and nodes are specified. allocator = job_fields.pop("iallocator", None) if allocator is not None: [job_fields.pop(f, None) for f in ("pnode", "snode")] name = job_fields.pop("name", job_fields.pop("instance_name", None)) # Turn off opportunistic locking before retrying the job job_fields["opportunistic_locking"] = False with pooled_rapi_client(vm) as c: jobID = c.CreateInstance(name=name, **job_fields) # Update the VM fields vm.backendjobid = jobID # Update the task_job_id for commissions vm.task_job_id = jobID vm.backendjobstatus = None vm.save() log.info("Retrying failed creation of instance '%s' without" " opportunistic locking. New job ID: '%s'", name, jobID) return backend_mod.process_op_status(vm, event_time, jobID, operation, status, logmsg, nics=nics, disks=disks, job_fields=job_fields) log.debug("Done processing ganeti-op-status msg for vm %s.", msg['instance'])
def reconcile_unsynced_operstate(self, server_id, db_server, gnt_server, atomic_context=None): if db_server.operstate != gnt_server["state"]: self.log.info("Server '%s' is '%s' in DB and '%s' in Ganeti.", server_id, db_server.operstate, gnt_server["state"]) if self.options["fix_unsynced"]: vm = get_locked_server(server_id) # If server is in building state, you will have first to # reconcile it's creation, to avoid wrong quotas if db_server.operstate == "BUILD": backend_mod.process_op_status( vm=vm, etime=self.event_time, jobid=-0, opcode="OP_INSTANCE_CREATE", status='success', logmsg='Reconciliation: simulated Ganeti event', atomic_context=atomic_context) fix_opcode = "OP_INSTANCE_STARTUP"\ if gnt_server["state"] == "STARTED"\ else "OP_INSTANCE_SHUTDOWN" backend_mod.process_op_status( vm=vm, etime=self.event_time, jobid=-0, opcode=fix_opcode, status='success', logmsg='Reconciliation: simulated Ganeti event', atomic_context=atomic_context) self.log.debug("Simulated Ganeti state event for server '%s'", server_id)
def reconcile_unsynced_operstate(self, server_id, db_server, gnt_server): if db_server.operstate != gnt_server["state"]: self.log.info("Server '%s' is '%s' in DB and '%s' in Ganeti.", server_id, db_server.operstate, gnt_server["state"]) if self.options["fix_unsynced"]: vm = get_locked_server(server_id) # If server is in building state, you will have first to # reconcile it's creation, to avoid wrong quotas if db_server.operstate == "BUILD": backend_mod.process_op_status( vm=vm, etime=self.event_time, jobid=-0, opcode="OP_INSTANCE_CREATE", status='success', logmsg='Reconciliation: simulated Ganeti event') fix_opcode = "OP_INSTANCE_STARTUP"\ if gnt_server["state"] == "STARTED"\ else "OP_INSTANCE_SHUTDOWN" backend_mod.process_op_status( vm=vm, etime=self.event_time, jobid=-0, opcode=fix_opcode, status='success', logmsg='Reconciliation: simulated Ganeti event') self.log.debug("Simulated Ganeti state event for server '%s'", server_id)
def reconcile_unsynced_nics(self, server_id, db_server, gnt_server): building_time = self.event_time - BUILDING_NIC_TIMEOUT db_nics = db_server.nics.exclude(state="BUILD", created__lte=building_time) \ .order_by("id") gnt_nics = gnt_server["nics"] try: gnt_nics_parsed = backend_mod.parse_instance_nics(gnt_nics) except Network.InvalidBackendIdError as e: self.log.warning("Server %s is connected to unknown network %s" " Cannot reconcile server." % (server_id, str(e))) return nics_changed = len(db_nics) != len(gnt_nics) for db_nic, gnt_nic in zip(db_nics, sorted(gnt_nics_parsed.items())): gnt_nic_id, gnt_nic = gnt_nic if (db_nic.id == gnt_nic_id) and\ backend_mod.nics_are_equal(db_nic, gnt_nic): continue else: nics_changed = True break if nics_changed: msg = "Found unsynced NICs for server '%s'.\n"\ "\tDB:\n\t\t%s\n\tGaneti:\n\t\t%s" db_nics_str = "\n\t\t".join(map(format_db_nic, db_nics)) gnt_nics_str = "\n\t\t".join(map(format_gnt_nic, sorted(gnt_nics_parsed.items()))) self.log.info(msg, server_id, db_nics_str, gnt_nics_str) if self.options["fix_unsynced_nics"]: vm = get_locked_server(server_id) backend_mod.process_op_status( vm=vm, etime=self.event_time, jobid=-0, opcode="OP_INSTANCE_SET_PARAMS", status='success', logmsg="Reconciliation: simulated Ganeti event", nics=gnt_nics)
def _reconcile(server_id, atomic_context=None): vm = get_locked_server(server_id) backend_mod.process_op_status( vm=vm, etime=self.event_time, jobid=-0, opcode='OP_INSTANCE_REMOVE', status='success', logmsg='Reconciliation: simulated Ganeti event', atomic_context=atomic_context)
def reconcile_unsynced_rescue(self, server_id, gnt_server, db_server): # Find servers that are using a cdrom as their primary boot device, # an check if these servers are in rescue mode in cyclades. hvparams = gnt_server.get("hvparams") if hvparams is None: return cdrom_image_path = hvparams.get('cdrom_image_path', '') boot_order = hvparams.get('boot_order', '') changed = False # If both cdrom_image_path is set and boot_order has cdrom priority, # then the VM is in rescue mode. We just have to figure out if the # rescue image is set correctly if boot_order.startswith('cdrom') and len(cdrom_image_path) > 0: # The server is not in rescue mode in cyclades if not db_server.rescue or db_server.rescue_image is None: changed = True else: location = db_server.rescue_image.location if (db_server.rescue_image.location_type == RescueImage.FILETYPE_FILE): # The location of the image in Ganeti is always a fullpath, # while in cyclades only the filename is stored. location = os.path.join(settings.RESCUE_IMAGE_PATH, location) # The VM in Ganeti is in rescue mode with a different image # than Ganeti if location != cdrom_image_path: changed = True else: # The server is not on rescue mode in Ganeti but it is on cyclades if db_server.rescue or db_server.rescue_image is not None: changed = True if changed: self.log.info( "Found unsynced rescue state for server %s: " "VM rescue: %s, VM rescue image location: %s " "boot_order: %s, cdrom_image_path: %s", db_server.id, db_server.rescue, db_server.rescue_image.location if db_server.rescue_image else None, boot_order, cdrom_image_path) if self.options["fix_unsynced_rescue"]: hvparams = { 'boot_order': boot_order, 'cdrom_image_path': cdrom_image_path } vm = get_locked_server(server_id) backend_mod.process_op_status( vm=vm, etime=self.event_time, jobid=-0, opcode="OP_INSTANCE_SET_PARAMS", status='success', logmsg="Reconciliation: simulated Ganeti event", hvparams=hvparams)
def reconcile_unsynced_rescue(self, server_id, gnt_server, db_server): # Find servers that are using a cdrom as their primary boot device, # an check if these servers are in rescue mode in cyclades. hvparams = gnt_server.get("hvparams") if hvparams is None: return cdrom_image_path = hvparams.get('cdrom_image_path', '') boot_order = hvparams.get('boot_order', '') changed = False # If both cdrom_image_path is set and boot_order has cdrom priority, # then the VM is in rescue mode. We just have to figure out if the # rescue image is set correctly if boot_order.startswith('cdrom') and len(cdrom_image_path) > 0: # The server is not in rescue mode in cyclades if not db_server.rescue or db_server.rescue_image is None: changed = True else: location = db_server.rescue_image.location if (db_server.rescue_image.location_type == RescueImage.FILETYPE_FILE): # The location of the image in Ganeti is always a fullpath, # while in cyclades only the filename is stored. location = os.path.join(settings.RESCUE_IMAGE_PATH, location) # The VM in Ganeti is in rescue mode with a different image # than Ganeti if location != cdrom_image_path: changed = True else: # The server is not on rescue mode in Ganeti but it is on cyclades if db_server.rescue or db_server.rescue_image is not None: changed = True if changed: self.log.info("Found unsynced rescue state for server %s: " "VM rescue: %s, VM rescue image location: %s " "boot_order: %s, cdrom_image_path: %s", db_server.id, db_server.rescue, db_server.rescue_image.location if db_server.rescue_image else None, boot_order, cdrom_image_path) if self.options["fix_unsynced_rescue"]: hvparams = { 'boot_order': boot_order, 'cdrom_image_path': cdrom_image_path } vm = get_locked_server(server_id) backend_mod.process_op_status( vm=vm, etime=self.event_time, jobid=-0, opcode="OP_INSTANCE_SET_PARAMS", status='success', logmsg="Reconciliation: simulated Ganeti event", hvparams=hvparams)
def reconcile_building_server(self, db_server): self.log.info("Server '%s' is BUILD in DB, but 'ERROR' in Ganeti.", db_server.id) if self.options["fix_unsynced"]: fix_opcode = "OP_INSTANCE_CREATE" vm = get_locked_server(db_server.id) backend_mod.process_op_status( vm=vm, etime=self.event_time, jobid=-0, opcode=fix_opcode, status='error', logmsg='Reconciliation: simulated Ganeti event') self.log.debug("Simulated Ganeti error build event for" " server '%s'", db_server.id)
def update_db(vm, msg, event_time): """Process a notification of type 'ganeti-op-status'""" log.debug("Processing ganeti-op-status msg: %s", msg) if msg['type'] != "ganeti-op-status": log.error("Message is of unknown type %s.", msg['type']) return nics = msg.get("nics", None) backend.process_op_status(vm, event_time, msg['jobId'], msg['operation'], msg['status'], msg['logmsg'], nics) log.debug("Done processing ganeti-op-status msg for vm %s.", msg['instance'])
def reconcile_unsynced_flavor(self, server_id, db_server, gnt_server, atomic_context=None): db_flavor = db_server.flavor gnt_flavor = gnt_server["flavor"] if (db_flavor.ram != gnt_flavor["ram"] or db_flavor.cpu != gnt_flavor["vcpus"] or db_flavor.disk != gnt_flavor["disk"]): try: gnt_flavor = Flavor.objects.get( ram=gnt_flavor["ram"], cpu=gnt_flavor["vcpus"], disk=gnt_flavor["disk"], volume_type_id=db_flavor.volume_type_id) except Flavor.DoesNotExist: self.log.warning("Server '%s' has unknown flavor.", server_id) return self.log.info( "Server '%s' has flavor '%s' in DB and '%s' in" " Ganeti", server_id, db_flavor, gnt_flavor) if self.options["fix_unsynced_flavors"]: vm = get_locked_server(server_id) old_state = vm.operstate opcode = "OP_INSTANCE_SET_PARAMS" beparams = { "vcpus": gnt_flavor.cpu, "minmem": gnt_flavor.ram, "maxmem": gnt_flavor.ram } backend_mod.process_op_status( vm=vm, etime=self.event_time, jobid=-0, opcode=opcode, status='success', job_fields={"beparams": beparams}, logmsg='Reconciliation: simulated Ganeti event', atomic_context=atomic_context) # process_op_status with beparams will set the vmstate to # shutdown. Fix this be returning it to old state vm = VirtualMachine.objects.get(pk=server_id) vm.operstate = old_state vm.save() self.log.debug("Simulated Ganeti flavor event for server '%s'", server_id)
def reconcile_building_server(self, db_server): self.log.info("Server '%s' is BUILD in DB, but 'ERROR' in Ganeti.", db_server.id) if self.options["fix_unsynced"]: fix_opcode = "OP_INSTANCE_CREATE" vm = get_locked_server(db_server.id) backend_mod.process_op_status( vm=vm, etime=self.event_time, jobid=-0, opcode=fix_opcode, status='error', logmsg='Reconciliation: simulated Ganeti event') self.log.debug( "Simulated Ganeti error build event for" " server '%s'", db_server.id)
def reconcile_unsynced_nics(self, server_id, db_server, gnt_server, atomic_context=None): building_time = self.event_time - BUILDING_NIC_TIMEOUT db_nics = db_server.nics.exclude(state="BUILD", created__lte=building_time) \ .order_by("id") gnt_nics = gnt_server["nics"] try: gnt_nics_parsed = backend_mod.parse_instance_nics(gnt_nics) except Network.InvalidBackendIdError as e: self.log.warning("Server %s is connected to unknown network %s" " Cannot reconcile server." % (server_id, str(e))) return nics_changed = len(db_nics) != len(gnt_nics) for db_nic, gnt_nic in zip(db_nics, sorted(gnt_nics_parsed.items())): gnt_nic_id, gnt_nic = gnt_nic if (db_nic.id == gnt_nic_id) and\ backend_mod.nics_are_equal(db_nic, gnt_nic): continue else: nics_changed = True break if nics_changed: msg = "Found unsynced NICs for server '%s'.\n"\ "\tDB:\n\t\t%s\n\tGaneti:\n\t\t%s" db_nics_str = "\n\t\t".join(map(format_db_nic, db_nics)) gnt_nics_str = "\n\t\t".join( map(format_gnt_nic, sorted(gnt_nics_parsed.items()))) self.log.info(msg, server_id, db_nics_str, gnt_nics_str) if self.options["fix_unsynced_nics"]: vm = get_locked_server(server_id) backend_mod.process_op_status( vm=vm, etime=self.event_time, jobid=-0, opcode="OP_INSTANCE_SET_PARAMS", status='success', logmsg="Reconciliation: simulated Ganeti event", nics=gnt_nics, atomic_context=atomic_context)
def reconcile_stale_servers(self): # Detect stale servers stale = [] stale_keys = self.db_servers_keys - self.gnt_servers_keys for server_id in stale_keys: db_server = self.db_servers[server_id] if db_server.operstate == "BUILD": build_status, end_timestamp = self.get_build_status(db_server) if build_status == "ERROR": # Special handling of BUILD eerrors self.reconcile_building_server(db_server) elif build_status != "RUNNING": stale.append(server_id) elif (db_server.operstate == "ERROR" and db_server.action != "DESTROY"): # Servers at building ERROR are stale only if the user has # asked to destroy them. pass else: stale.append(server_id) # Report them if stale: self.log.info("Found stale servers %s at backend %s", ", ".join(map(str, stale)), self.backend) else: self.log.debug("No stale servers at backend %s", self.backend) # Fix them if stale and self.options["fix_stale"]: for server_id in stale: vm = get_locked_server(server_id) backend_mod.process_op_status( vm=vm, etime=self.event_time, jobid=-0, opcode='OP_INSTANCE_REMOVE', status='success', logmsg='Reconciliation: simulated Ganeti event') self.log.debug("Simulated Ganeti removal for stale servers.")
def reconcile_unsynced_flavor(self, server_id, db_server, gnt_server, atomic_context=None): db_flavor = db_server.flavor gnt_flavor = gnt_server["flavor"] if (db_flavor.ram != gnt_flavor["ram"] or db_flavor.cpu != gnt_flavor["vcpus"] or db_flavor.disk != gnt_flavor["disk"]): try: gnt_flavor = Flavor.objects.get( ram=gnt_flavor["ram"], cpu=gnt_flavor["vcpus"], disk=gnt_flavor["disk"], volume_type_id=db_flavor.volume_type_id) except Flavor.DoesNotExist: self.log.warning("Server '%s' has unknown flavor.", server_id) return self.log.info("Server '%s' has flavor '%s' in DB and '%s' in" " Ganeti", server_id, db_flavor, gnt_flavor) if self.options["fix_unsynced_flavors"]: vm = get_locked_server(server_id) old_state = vm.operstate opcode = "OP_INSTANCE_SET_PARAMS" beparams = {"vcpus": gnt_flavor.cpu, "minmem": gnt_flavor.ram, "maxmem": gnt_flavor.ram} backend_mod.process_op_status( vm=vm, etime=self.event_time, jobid=-0, opcode=opcode, status='success', job_fields={"beparams": beparams}, logmsg='Reconciliation: simulated Ganeti event', atomic_context=atomic_context) # process_op_status with beparams will set the vmstate to # shutdown. Fix this be returning it to old state vm = VirtualMachine.objects.get(pk=server_id) vm.operstate = old_state vm.save() self.log.debug("Simulated Ganeti flavor event for server '%s'", server_id)
def console(vm, console_type): """Arrange for an OOB console of the specified type This method arranges for an OOB console of the specified type. Only consoles of type "vnc" are supported for now. It uses a running instance of vncauthproxy to setup proper VNC forwarding with a random password, then returns the necessary VNC connection info to the caller. """ log.info("Get console VM %s, type %s", vm, console_type) if vm.operstate != "STARTED": raise faults.BadRequest('Server not in ACTIVE state.') # Use RAPI to get VNC console information for this instance # RAPI GetInstanceConsole() returns endpoints to the vnc_bind_address, # which is a cluster-wide setting, either 0.0.0.0 or 127.0.0.1, and pretty # useless (see #783). # # Until this is fixed on the Ganeti side, construct a console info reply # directly. # # WARNING: This assumes that VNC runs on port network_port on # the instance's primary node, and is probably # hypervisor-specific. def get_console_data(i): return {"kind": "vnc", "host": i["pnode"], "port": i["network_port"]} with pooled_rapi_client(vm) as c: i = c.GetInstance(vm.backend_vm_id) console_data = get_console_data(i) if vm.backend.hypervisor == "kvm" and i['hvparams']['serial_console']: raise Exception("hv parameter serial_console cannot be true") # Check that the instance is really running if not i["oper_state"]: log.warning("VM '%s' is marked as '%s' in DB while DOWN in Ganeti", vm.id, vm.operstate) # Instance is not running. Mock a shutdown job to sync DB backend.process_op_status(vm, etime=datetime.now(), jobid=0, opcode="OP_INSTANCE_SHUTDOWN", status="success", logmsg="Reconciliation simulated event") raise faults.BadRequest('Server not in ACTIVE state.') # Let vncauthproxy decide on the source port. # The alternative: static allocation, e.g. # sport = console_data['port'] - 1000 sport = 0 daddr = console_data['host'] dport = console_data['port'] password = util.random_password() vnc_extra_opts = settings.CYCLADES_VNCAUTHPROXY_OPTS # Maintain backwards compatibility with the dict setting if isinstance(vnc_extra_opts, list): vnc_extra_opts = choice(vnc_extra_opts) fwd = request_vnc_forwarding(sport, daddr, dport, password, console_type=console_type, **vnc_extra_opts) if fwd['status'] != "OK": log.error("vncauthproxy returned error status: '%s'" % fwd) raise faults.ServiceUnavailable('vncauthproxy returned error status') # Verify that the VNC server settings haven't changed with pooled_rapi_client(vm) as c: i = c.GetInstance(vm.backend_vm_id) if get_console_data(i) != console_data: raise faults.ServiceUnavailable('VNC Server settings changed.') try: host = fwd['proxy_address'] except KeyError: host = getfqdn() console = { 'type': console_type, 'host': host, 'port': fwd['source_port'], 'password': password} return console
def console(vm, console_type): """Arrange for an OOB console of the specified type This method arranges for an OOB console of the specified type. Only consoles of type "vnc" are supported for now. It uses a running instance of vncauthproxy to setup proper VNC forwarding with a random password, then returns the necessary VNC connection info to the caller. """ log.info("Get console VM %s, type %s", vm, console_type) if vm.operstate != "STARTED": raise faults.BadRequest('Server not in ACTIVE state.') # Use RAPI to get VNC console information for this instance # RAPI GetInstanceConsole() returns endpoints to the vnc_bind_address, # which is a cluster-wide setting, either 0.0.0.0 or 127.0.0.1, and pretty # useless (see #783). # # Until this is fixed on the Ganeti side, construct a console info reply # directly. # # WARNING: This assumes that VNC runs on port network_port on # the instance's primary node, and is probably # hypervisor-specific. def get_console_data(i): return {"kind": "vnc", "host": i["pnode"], "port": i["network_port"]} with pooled_rapi_client(vm) as c: i = c.GetInstance(vm.backend_vm_id) console_data = get_console_data(i) if vm.backend.hypervisor == "kvm" and i['hvparams']['serial_console']: raise Exception("hv parameter serial_console cannot be true") # Check that the instance is really running if not i["oper_state"]: log.warning("VM '%s' is marked as '%s' in DB while DOWN in Ganeti", vm.id, vm.operstate) # Instance is not running. Mock a shutdown job to sync DB backend.process_op_status(vm, etime=datetime.now(), jobid=0, opcode="OP_INSTANCE_SHUTDOWN", status="success", logmsg="Reconciliation simulated event") raise faults.BadRequest('Server not in ACTIVE state.') # Let vncauthproxy decide on the source port. # The alternative: static allocation, e.g. # sport = console_data['port'] - 1000 sport = 0 daddr = console_data['host'] dport = console_data['port'] password = util.random_password() vnc_extra_opts = settings.CYCLADES_VNCAUTHPROXY_OPTS # Maintain backwards compatibility with the dict setting if isinstance(vnc_extra_opts, list): vnc_extra_opts = choice(vnc_extra_opts) fwd = request_vnc_forwarding(sport, daddr, dport, password, console_type=console_type, **vnc_extra_opts) if fwd['status'] != "OK": log.error("vncauthproxy returned error status: '%s'" % fwd) raise faults.ServiceUnavailable('vncauthproxy returned error status') # Verify that the VNC server settings haven't changed with pooled_rapi_client(vm) as c: i = c.GetInstance(vm.backend_vm_id) if get_console_data(i) != console_data: raise faults.ServiceUnavailable('VNC Server settings changed.') try: host = fwd['proxy_address'] except KeyError: host = getfqdn() console = { 'type': console_type, 'host': host, 'port': fwd['source_port'], 'password': password } return console
def do_create_server(userid, name, password, flavor, image, metadata={}, personality=[], network=None, backend=None): # Fix flavor for archipelago disk_template, provider = util.get_flavor_provider(flavor) if provider: flavor.disk_template = disk_template flavor.disk_provider = provider flavor.disk_origin = image['checksum'] image['backend_id'] = 'null' else: flavor.disk_provider = None flavor.disk_origin = None try: if backend is None: # Allocate backend to host the server. backend_allocator = BackendAllocator() backend = backend_allocator.allocate(userid, flavor) if backend is None: log.error("No available backend for VM with flavor %s", flavor) raise faults.ServiceUnavailable("No available backends") if network is None: # Allocate IP from public network (network, address) = util.get_public_ip(backend) else: address = util.get_network_free_address(network) # We must save the VM instance now, so that it gets a valid # vm.backend_vm_id. vm = VirtualMachine.objects.create( name=name, backend=backend, userid=userid, imageid=image["id"], flavor=flavor, action="CREATE") log.info("Created entry in DB for VM '%s'", vm) # Create VM's public NIC. Do not wait notification form ganeti hooks to # create this NIC, because if the hooks never run (e.g. building error) # the VM's public IP address will never be released! nic = NetworkInterface.objects.create(machine=vm, network=network, index=0, ipv4=address, state="BUILDING") # Also we must create the VM metadata in the same transaction. for key, val in metadata.items(): VirtualMachineMetadata.objects.create( meta_key=key, meta_value=val, vm=vm) # Issue commission to Quotaholder and accept it since at the end of # this transaction the VirtualMachine object will be created in the DB. # Note: the following call does a commit! quotas.issue_and_accept_commission(vm) except: transaction.rollback() raise else: transaction.commit() try: vm = VirtualMachine.objects.select_for_update().get(id=vm.id) # dispatch server created signal needed to trigger the 'vmapi', which # enriches the vm object with the 'config_url' attribute which must be # passed to the Ganeti job. server_created.send(sender=vm, created_vm_params={ 'img_id': image['backend_id'], 'img_passwd': password, 'img_format': str(image['format']), 'img_personality': json.dumps(personality), 'img_properties': json.dumps(image['metadata']), }) jobID = create_instance(vm, nic, flavor, image) # At this point the job is enqueued in the Ganeti backend vm.backendopcode = "OP_INSTANCE_CREATE" vm.backendjobid = jobID vm.save() transaction.commit() log.info("User %s created VM %s, NIC %s, Backend %s, JobID %s", userid, vm, nic, backend, str(jobID)) except: # If an exception is raised, then the user will never get the VM id. # In order to delete it from DB and release it's resources, we # mock a successful OP_INSTANCE_REMOVE job. process_op_status(vm=vm, etime=datetime.datetime.now(), jobid=-0, opcode="OP_INSTANCE_REMOVE", status="success", logmsg="Reconciled eventd: VM creation failed.") raise return vm
def handle(self, **options): verbosity = int(options['verbosity']) self._process_args(options) backend_id = options['backend-id'] backend = get_backend(backend_id) if backend_id else None G, GNics = reconciliation.get_instances_from_ganeti(backend) D = reconciliation.get_servers_from_db(backend) DBNics = reconciliation.get_nics_from_db(backend) # # Detect problems # if options['detect_stale']: stale = reconciliation.stale_servers_in_db(D, G) if len(stale) > 0: print >> sys.stderr, "Found the following stale server IDs: " print " " + "\n ".join([str(x) for x in stale]) elif verbosity == 2: print >> sys.stderr, "Found no stale server IDs in DB." if options['detect_orphans']: orphans = reconciliation.orphan_instances_in_ganeti(D, G) if len(orphans) > 0: print >> sys.stderr, "Found orphan Ganeti instances with IDs: " print " " + "\n ".join([str(x) for x in orphans]) elif verbosity == 2: print >> sys.stderr, "Found no orphan Ganeti instances." if options['detect_unsynced']: unsynced = reconciliation.unsynced_operstate(D, G) if len(unsynced) > 0: print >> sys.stderr, "The operstate of the following server" \ " IDs is out-of-sync:" print " " + "\n ".join([ "%d is %s in DB, %s in Ganeti" % (x[0], x[1], ('UP' if x[2] else 'DOWN')) for x in unsynced ]) elif verbosity == 2: print >> sys.stderr, "The operstate of all servers is in sync." if options['detect_build_errors']: build_errors = reconciliation.instances_with_build_errors(D, G) if len(build_errors) > 0: msg = "The os for the following server IDs was not build"\ " successfully:" print >> sys.stderr, msg print " " + "\n ".join(["%d" % x for x in build_errors]) elif verbosity == 2: print >> sys.stderr, "Found no instances with build errors." if options['detect_unsynced_nics']: def pretty_print_nics(nics): if not nics: print ''.ljust(18) + 'None' for index, info in nics.items(): print ''.ljust(18) + 'nic/' + str(index) +\ ': MAC: %s, IP: %s, Network: %s' % \ (info['mac'], info['ipv4'], info['network']) unsynced_nics = reconciliation.unsynced_nics(DBNics, GNics) if len(unsynced_nics) > 0: msg = "The NICs of the servers with the following IDs are"\ " unsynced:" print >> sys.stderr, msg for id, nics in unsynced_nics.items(): print ''.ljust(2) + '%6d:' % id print ''.ljust(8) + '%8s:' % 'DB' pretty_print_nics(nics[0]) print ''.ljust(8) + '%8s:' % 'Ganeti' pretty_print_nics(nics[1]) elif verbosity == 2: print >> sys.stderr, "All instance nics are synced." # # Then fix them # if options['fix_stale'] and len(stale) > 0: print >> sys.stderr, \ "Simulating successful Ganeti removal for %d " \ "servers in the DB:" % len(stale) for vm in VirtualMachine.objects.filter(pk__in=stale): event_time = datetime.datetime.now() backend_mod.process_op_status( vm=vm, etime=event_time, jobid=-0, opcode='OP_INSTANCE_REMOVE', status='success', logmsg='Reconciliation: simulated Ganeti event') print >> sys.stderr, " ...done" if options['fix_orphans'] and len(orphans) > 0: print >> sys.stderr, \ "Issuing OP_INSTANCE_REMOVE for %d Ganeti instances:" % \ len(orphans) for id in orphans: try: vm = VirtualMachine.objects.get(pk=id) with pooled_rapi_client(vm) as client: client.DeleteInstance(utils.id_to_instance_name(id)) except VirtualMachine.DoesNotExist: print >> sys.stderr, "No entry for VM %d in DB !!" % id print >> sys.stderr, " ...done" if options['fix_unsynced'] and len(unsynced) > 0: print >> sys.stderr, "Setting the state of %d out-of-sync VMs:" % \ len(unsynced) for id, db_state, ganeti_up in unsynced: vm = VirtualMachine.objects.get(pk=id) opcode = "OP_INSTANCE_REBOOT" if ganeti_up \ else "OP_INSTANCE_SHUTDOWN" event_time = datetime.datetime.now() backend_mod.process_op_status( vm=vm, etime=event_time, jobid=-0, opcode=opcode, status='success', logmsg='Reconciliation: simulated Ganeti event') print >> sys.stderr, " ...done" if options['fix_build_errors'] and len(build_errors) > 0: print >> sys.stderr, "Setting the state of %d build-errors VMs:" %\ len(build_errors) for id in build_errors: vm = VirtualMachine.objects.get(pk=id) event_time = datetime.datetime.now() backend_mod.process_op_status( vm=vm, etime=event_time, jobid=-0, opcode="OP_INSTANCE_CREATE", status='error', logmsg='Reconciliation: simulated Ganeti event') print >> sys.stderr, " ...done" if options['fix_unsynced_nics'] and len(unsynced_nics) > 0: print >> sys.stderr, "Setting the nics of %d out-of-sync VMs:" % \ len(unsynced_nics) for id, nics in unsynced_nics.items(): vm = VirtualMachine.objects.get(pk=id) nics = nics[1] # Ganeti nics if nics == {}: # No nics vm.nics.all.delete() continue for index, nic in nics.items(): net_id = utils.id_from_network_name(nic['network']) subnet6 = Network.objects.get(id=net_id).subnet6 # Produce ipv6 ipv6 = subnet6 and mac2eui64(nic['mac'], subnet6) or None nic['ipv6'] = ipv6 # Rename ipv4 to ip nic['ip'] = nic['ipv4'] # Dict to sorted list final_nics = [] nics_keys = nics.keys() nics_keys.sort() for i in nics_keys: if nics[i]['network']: final_nics.append(nics[i]) else: print 'Network of nic %d of vm %s is None. ' \ 'Can not reconcile' % (i, vm.backend_vm_id) event_time = datetime.datetime.now() backend_mod.process_net_status(vm=vm, etime=event_time, nics=final_nics) print >> sys.stderr, " ...done"
def handle(self, **options): verbosity = int(options['verbosity']) self._process_args(options) backend_id = options['backend-id'] backend = get_backend(backend_id) if backend_id else None G, GNics = reconciliation.get_instances_from_ganeti(backend) D = reconciliation.get_servers_from_db(backend) DBNics = reconciliation.get_nics_from_db(backend) # # Detect problems # if options['detect_stale']: stale = reconciliation.stale_servers_in_db(D, G) if len(stale) > 0: print >> sys.stderr, "Found the following stale server IDs: " print " " + "\n ".join( [str(x) for x in stale]) elif verbosity == 2: print >> sys.stderr, "Found no stale server IDs in DB." if options['detect_orphans']: orphans = reconciliation.orphan_instances_in_ganeti(D, G) if len(orphans) > 0: print >> sys.stderr, "Found orphan Ganeti instances with IDs: " print " " + "\n ".join( [str(x) for x in orphans]) elif verbosity == 2: print >> sys.stderr, "Found no orphan Ganeti instances." if options['detect_unsynced']: unsynced = reconciliation.unsynced_operstate(D, G) if len(unsynced) > 0: print >> sys.stderr, "The operstate of the following server" \ " IDs is out-of-sync:" print " " + "\n ".join( ["%d is %s in DB, %s in Ganeti" % (x[0], x[1], ('UP' if x[2] else 'DOWN')) for x in unsynced]) elif verbosity == 2: print >> sys.stderr, "The operstate of all servers is in sync." if options['detect_build_errors']: build_errors = reconciliation.instances_with_build_errors(D, G) if len(build_errors) > 0: msg = "The os for the following server IDs was not build"\ " successfully:" print >> sys.stderr, msg print " " + "\n ".join( ["%d" % x for x in build_errors]) elif verbosity == 2: print >> sys.stderr, "Found no instances with build errors." if options['detect_unsynced_nics']: def pretty_print_nics(nics): if not nics: print ''.ljust(18) + 'None' for index, info in nics.items(): print ''.ljust(18) + 'nic/' + str(index) +\ ': MAC: %s, IP: %s, Network: %s' % \ (info['mac'], info['ipv4'], info['network']) unsynced_nics = reconciliation.unsynced_nics(DBNics, GNics) if len(unsynced_nics) > 0: msg = "The NICs of the servers with the following IDs are"\ " unsynced:" print >> sys.stderr, msg for id, nics in unsynced_nics.items(): print ''.ljust(2) + '%6d:' % id print ''.ljust(8) + '%8s:' % 'DB' pretty_print_nics(nics[0]) print ''.ljust(8) + '%8s:' % 'Ganeti' pretty_print_nics(nics[1]) elif verbosity == 2: print >> sys.stderr, "All instance nics are synced." # # Then fix them # if options['fix_stale'] and len(stale) > 0: print >> sys.stderr, \ "Simulating successful Ganeti removal for %d " \ "servers in the DB:" % len(stale) for vm in VirtualMachine.objects.filter(pk__in=stale): event_time = datetime.datetime.now() backend_mod.process_op_status( vm=vm, etime=event_time, jobid=-0, opcode='OP_INSTANCE_REMOVE', status='success', logmsg='Reconciliation: simulated Ganeti event') print >> sys.stderr, " ...done" if options['fix_orphans'] and len(orphans) > 0: print >> sys.stderr, \ "Issuing OP_INSTANCE_REMOVE for %d Ganeti instances:" % \ len(orphans) for id in orphans: try: vm = VirtualMachine.objects.get(pk=id) with pooled_rapi_client(vm) as client: client.DeleteInstance(utils.id_to_instance_name(id)) except VirtualMachine.DoesNotExist: print >> sys.stderr, "No entry for VM %d in DB !!" % id print >> sys.stderr, " ...done" if options['fix_unsynced'] and len(unsynced) > 0: print >> sys.stderr, "Setting the state of %d out-of-sync VMs:" % \ len(unsynced) for id, db_state, ganeti_up in unsynced: vm = VirtualMachine.objects.get(pk=id) opcode = "OP_INSTANCE_REBOOT" if ganeti_up \ else "OP_INSTANCE_SHUTDOWN" event_time = datetime.datetime.now() backend_mod.process_op_status( vm=vm, etime=event_time, jobid=-0, opcode=opcode, status='success', logmsg='Reconciliation: simulated Ganeti event') print >> sys.stderr, " ...done" if options['fix_build_errors'] and len(build_errors) > 0: print >> sys.stderr, "Setting the state of %d build-errors VMs:" %\ len(build_errors) for id in build_errors: vm = VirtualMachine.objects.get(pk=id) event_time = datetime.datetime.now() backend_mod.process_op_status( vm=vm, etime=event_time, jobid=-0, opcode="OP_INSTANCE_CREATE", status='error', logmsg='Reconciliation: simulated Ganeti event') print >> sys.stderr, " ...done" if options['fix_unsynced_nics'] and len(unsynced_nics) > 0: print >> sys.stderr, "Setting the nics of %d out-of-sync VMs:" % \ len(unsynced_nics) for id, nics in unsynced_nics.items(): vm = VirtualMachine.objects.get(pk=id) nics = nics[1] # Ganeti nics if nics == {}: # No nics vm.nics.all.delete() continue for index, nic in nics.items(): net_id = utils.id_from_network_name(nic['network']) subnet6 = Network.objects.get(id=net_id).subnet6 # Produce ipv6 ipv6 = subnet6 and mac2eui64(nic['mac'], subnet6) or None nic['ipv6'] = ipv6 # Rename ipv4 to ip nic['ip'] = nic['ipv4'] # Dict to sorted list final_nics = [] nics_keys = nics.keys() nics_keys.sort() for i in nics_keys: if nics[i]['network']: final_nics.append(nics[i]) else: print 'Network of nic %d of vm %s is None. ' \ 'Can not reconcile' % (i, vm.backend_vm_id) event_time = datetime.datetime.now() backend_mod.process_net_status(vm=vm, etime=event_time, nics=final_nics) print >> sys.stderr, " ...done"