예제 #1
0
 def _run(self):
     t = None
     if self._memory_params:
         t = AbortSnapshot(self._vm, self._job_uuid, self._start_time,
                           self._timeout, self._abort, self._completed,
                           self._snapshot_job, self._lock)
         t.start()
     try:
         self._vm.log.info('Starting snapshot job')
         if self._recovery:
             LiveSnapshotRecovery(self._vm, self._abort, self._completed,
                                  self._snapshot_job, self._lock).run()
         else:
             snap = Snapshot(self._vm, self._snap_drives,
                             self._memory_params, self._frozen,
                             self._job_uuid, self._abort, self._completed,
                             self._start_time, self._timeout,
                             self._snapshot_job, self._lock,
                             self._freeze_timeout)
             snap.snapshot()
     except:
         # Setting the abort in cases where the snapshot job failed before
         # starting the snapshot in libvirt, causing AbortSnapshot thread
         # to finish. This is also safe for recovery, since it saved to the
         # VMs metadata. The engine will see abort and failed as the same.
         _set_abort(self._vm, self._snapshot_job, self._completed,
                    self._abort, self._lock)
         # We need to raise an exception in order to make job framework
         # report the current job as a failure.
         raise exception.SnapshotFailed()
     finally:
         if self._memory_params:
             t.join()
예제 #2
0
    def snapshot(self):
        """Live snapshot command"""
        def norm_snap_drive_params(drive):
            """Normalize snapshot parameters"""

            if "baseVolumeID" in drive:
                base_drv = {
                    "device": "disk",
                    "domainID": drive["domainID"],
                    "imageID": drive["imageID"],
                    "volumeID": drive["baseVolumeID"]
                }
                target_drv = base_drv.copy()
                target_drv["volumeID"] = drive["volumeID"]

            elif "baseGUID" in drive:
                base_drv = {"GUID": drive["baseGUID"]}
                target_drv = {"GUID": drive["GUID"]}

            elif "baseUUID" in drive:
                base_drv = {"UUID": drive["baseUUID"]}
                target_drv = {"UUID": drive["UUID"]}

            else:
                base_drv, target_drv = (None, None)

            return base_drv, target_drv

        def rollback_drives(new_drives):
            """Rollback the prepared volumes for the snapshot"""

            for vm_dev_name, drive in new_drives.items():
                try:
                    self._vm.cif.teardownVolumePath(drive)
                except Exception:
                    self._vm.log.exception("Unable to teardown drive: %s",
                                           vm_dev_name)

        def memory_snapshot(memory_volume_path):
            """Libvirt snapshot XML"""

            return vmxml.Element('memory',
                                 snapshot='external',
                                 file=memory_volume_path)

        def vm_conf_for_memory_snapshot():
            """Returns the needed vm configuration with the memory snapshot"""

            return {
                'restoreFromSnapshot': True,
                '_srcDomXML': self._vm.migratable_domain_xml(),
                'elapsedTimeOffset': time.time() - self._vm.start_time
            }

        snap = vmxml.Element('domainsnapshot')
        disks = vmxml.Element('disks')
        new_drives = {}
        vm_drives = {}

        for drive in self._snap_drives:
            base_drv, tget_drv = norm_snap_drive_params(drive)

            try:
                self._vm.findDriveByUUIDs(tget_drv)
            except LookupError:
                # The vm is not already using the requested volume for the
                # snapshot, continuing.
                pass
            else:
                # The snapshot volume is the current one, skipping
                self._vm.log.debug("The volume is already in use: %s",
                                   tget_drv)
                continue  # Next drive

            try:
                vm_drive = self._vm.findDriveByUUIDs(base_drv)
            except LookupError:
                # The volume we want to snapshot doesn't exist
                self._vm.log.error("The base volume doesn't exist: %s",
                                   base_drv)
                raise exception.SnapshotFailed()

            if vm_drive.hasVolumeLeases:
                self._vm.log.error('disk %s has volume leases', vm_drive.name)
                raise exception.SnapshotFailed()

            if vm_drive.transientDisk:
                self._vm.log.error('disk %s is a transient disk',
                                   vm_drive.name)
                raise exception.SnapshotFailed()

            vm_dev_name = vm_drive.name

            new_drives[vm_dev_name] = tget_drv.copy()
            new_drives[vm_dev_name]["type"] = "disk"
            new_drives[vm_dev_name]["diskType"] = vm_drive.diskType
            new_drives[vm_dev_name]["poolID"] = vm_drive.poolID
            new_drives[vm_dev_name]["name"] = vm_dev_name
            new_drives[vm_dev_name]["format"] = "cow"

            # We need to keep track of the drive object because
            # it keeps original data and used to generate snapshot element.
            # We keep the old volume ID so we can clear the block threshold.
            vm_drives[vm_dev_name] = (vm_drive, base_drv["volumeID"])

        prepared_drives = {}

        for vm_dev_name, vm_device in new_drives.items():
            # Adding the device before requesting to prepare it as we want
            # to be sure to teardown it down even when prepareVolumePath
            # failed for some unknown issue that left the volume active.
            prepared_drives[vm_dev_name] = vm_device
            try:
                new_drives[vm_dev_name]["path"] = \
                    self._vm.cif.prepareVolumePath(new_drives[vm_dev_name])
            except Exception:
                self._vm.log.exception(
                    'unable to prepare the volume path for '
                    'disk %s', vm_dev_name)
                rollback_drives(prepared_drives)
                raise exception.SnapshotFailed()

            drive, _ = vm_drives[vm_dev_name]
            snapelem = drive.get_snapshot_xml(vm_device)
            disks.appendChild(snapelem)

        snap.appendChild(disks)

        snap_flags = (libvirt.VIR_DOMAIN_SNAPSHOT_CREATE_REUSE_EXT
                      | libvirt.VIR_DOMAIN_SNAPSHOT_CREATE_NO_METADATA)

        if self._memory_params:
            # Save the needed vm configuration
            # TODO: this, as other places that use pickle.dump
            # directly to files, should be done with outOfProcess
            vm_conf_vol = self._memory_params['dstparams']
            vm_conf_vol_path = self._vm.cif.prepareVolumePath(vm_conf_vol)
            try:
                with open(vm_conf_vol_path, "rb+") as f:
                    vm_conf = vm_conf_for_memory_snapshot()
                    # protocol=2 is needed for clusters < 4.4
                    # (for Python 2 host compatibility)
                    data = pickle.dumps(vm_conf, protocol=2)

                    # Ensure that the volume is aligned; qemu-img may segfault
                    # when converting unligned images.
                    # https://bugzilla.redhat.com/1649788
                    aligned_length = utils.round(len(data), 4096)
                    data = data.ljust(aligned_length, b"\0")

                    f.write(data)
                    f.flush()
                    os.fsync(f.fileno())
            finally:
                self._vm.cif.teardownVolumePath(vm_conf_vol)

            # Adding the memory volume to the snapshot xml
            memory_vol = self._memory_params['dst']
            memory_vol_path = self._vm.cif.prepareVolumePath(memory_vol)
            snap.appendChild(memory_snapshot(memory_vol_path))
        else:
            memory_vol = memory_vol_path = None
            snap_flags |= libvirt.VIR_DOMAIN_SNAPSHOT_CREATE_DISK_ONLY

        snapxml = xmlutils.tostring(snap)
        # TODO: this is debug information. For 3.6.x we still need to
        # see the XML even with 'info' as default level.
        self._vm.log.info("%s", snapxml)

        self._snapshot_job['memoryVolPath'] = memory_vol_path
        self._snapshot_job['memoryVol'] = memory_vol
        self._snapshot_job['newDrives'] = new_drives
        vm_drives_serialized = {}
        for k, v in vm_drives.items():
            vm_drives_serialized[k] = [xmlutils.tostring(v[0].getXML()), v[1]]
        self._snapshot_job['vmDrives'] = vm_drives_serialized
        _write_snapshot_md(self._vm, self._snapshot_job, self._lock)

        # We need to stop the volume monitor for two reasons, one is to
        # prevent spurious libvirt errors about missing drive paths (since
        # we're changing them), and also to prevent to trigger a drive
        # extension for the new volume with the apparent size of the old one
        # (the apparentsize is updated as last step in updateDriveParameters)
        self._vm.volume_monitor.disable()

        try:
            if self._should_freeze:
                self._vm.freeze()
            if not self._memory_params:
                run_time = _running_time(self._start_time)
                if run_time > self._freeze_timeout:
                    self._vm.log.error(
                        "Non-memory snapshot timeout %s passed after %s "
                        "seconds", self._freeze_timeout, run_time)
                    raise exception.SnapshotFailed()

            self._vm.log.info(
                "Taking a live snapshot (drives=%s,"
                "memory=%s)",
                ', '.join(drive["name"] for drive in new_drives.values()),
                self._memory_params is not None)
            try:
                self._vm.run_dom_snapshot(snapxml, snap_flags)
            except libvirt.libvirtError as e:
                if e.get_error_code() == libvirt.VIR_ERR_OPERATION_ABORTED:
                    self_abort = self._abort.is_set()
                    with self._lock:
                        self._abort.set()
                        self._snapshot_job['abort'] = self._abort.is_set()
                    _set_abort(self._vm, self._snapshot_job, self._completed,
                               self._abort, self._lock)
                    if self_abort:
                        self._vm.log.info("Snapshot timeout reached,"
                                          " operation aborted")
                    else:
                        self._vm.log.warning(
                            "Snapshot operation"
                            " aborted by libvirt: %s", e.get_error_message())
                self._vm.log.exception("Unable to take snapshot")
                if self._abort.is_set():
                    # This will cause a jump into the finalize_vm.
                    # The abort is set and the finalize_vm will raise
                    # ActionStopped exception as well. This is an indicator
                    # to the Jobs framework signing a client abort of the job.
                    raise exception.ActionStopped()
                self._thaw_vm()
                raise exception.SnapshotFailed()
            _set_completed(self._vm, self._snapshot_job, self._completed,
                           self._abort, self._lock)
            if self._completed.is_set():
                _write_snapshot_md(self._vm, self._snapshot_job, self._lock)
                self._vm.log.info("Completed live snapshot")
        except:
            # In case the VM was shutdown in the middle of the snapshot
            # operation we keep doing the finalizing and reporting the failure.
            # Or, when the Job was aborted, finalize_vm will raise
            # ActionStopped exception to sign it was aborted by user(VDSM).
            self.finalize_vm(memory_vol)
            res = False
        else:
            res = self.teardown(memory_vol_path, memory_vol, new_drives,
                                vm_drives)
        if not res:
            raise RuntimeError("Failed to execute snapshot, "
                               "considering the operation as failure")
예제 #3
0
 'unexpected': exception.UnexpectedError().response(),
 'unsupFormat': exception.UnsupportedImageFormat().response(),
 'ticketErr': exception.SpiceTicketError().response(),
 'nonresp': exception.NonResponsiveGuestAgent().response(),
 # codes 20-35 are reserved for add/delNetwork
 # code 39 was used for:
 # wrongHost - migration destination has an invalid hostname
 'unavail': exception.ResourceUnavailable().response(),
 'changeDisk': exception.ChangeDiskFailed().response(),
 'destroyErr': exception.VMDestroyFailed().response(),
 'fenceAgent': exception.UnsupportedFenceAgent().response(),
 'noimpl': exception.MethodNotImplemented().response(),
 'hotplugDisk': exception.HotplugDiskFailed().response(),
 'hotunplugDisk': exception.HotunplugDiskFailed().response(),
 'migCancelErr': exception.MigrationCancelationFailed().response(),
 'snapshotErr': exception.SnapshotFailed().response(),
 'hotplugNic': exception.HotplugNicFailed().response(),
 'hotunplugNic': exception.HotunplugNicFailed().response(),
 'migInProgress': exception.MigrationInProgress().response(),
 'mergeErr': exception.MergeFailed().response(),
 'balloonErr': exception.BalloonError().response(),
 'momErr': exception.MOMPolicyUpdateFailed().response(),
 'replicaErr': exception.ReplicaError().response(),
 'updateDevice': exception.UpdateDeviceFailed().response(),
 'hwInfoErr': exception.CannotRetrieveHWInfo().response(),
 'resizeErr': exception.BadDiskResizeParameter().response(),
 'transientErr': exception.TransientError().response(),
 'setNumberOfCpusErr': exception.SetNumberOfCpusFailed().response(),
 'haErr': exception.SetHAPolicyFailed().response(),
 'cpuTuneErr': exception.CpuTuneError().response(),
 'updateVmPolicyErr': exception.UpdateVMPolicyFailed().response(),