def main(profile, dry_run, router): """Live migrate ROUTER""" click_log.basic_config() log_to_slack = True logging.task = 'Live Migrate HV to new POD' logging.slack_title = 'Domain' if dry_run: log_to_slack = False logging.warning('Running in dry-run mode, will only show changes') co = CosmicOps(profile=profile, dry_run=dry_run, log_to_slack=log_to_slack) router = co.get_system_vm(name=router) if not router: sys.exit(1) source_host = co.get_host(id=router['hostid']) if not source_host: sys.exit(1) cluster = co.get_cluster(id=source_host['clusterid']) if not cluster: sys.exit(1) destination_host = cluster.find_migration_host(router) if not destination_host: sys.exit(1) if not router.migrate(destination_host): sys.exit(1)
def main(profile, destination_dc, dry_run, host, cluster): """Migrate all VMs on HOST to CLUSTER""" click_log.basic_config() log_to_slack = True logging.task = 'Live Migrate HV to new POD' logging.slack_title = 'Domain' if dry_run: log_to_slack = False logging.warning('Running in dry-run mode, will only show changes') co = CosmicOps(profile=profile, dry_run=dry_run, log_to_slack=log_to_slack) cs = CosmicSQL(server=profile, dry_run=dry_run) host = co.get_host(name=host) if not host: sys.exit(1) for vm in host.get_all_vms() + host.get_all_project_vms(): live_migrate(co=co, cs=cs, cluster=cluster, vm_name=vm['name'], destination_dc=destination_dc, add_affinity_group=None, is_project_vm=None, zwps_to_cwps=None, log_to_slack=log_to_slack, dry_run=dry_run)
def main(profile, dry_run, instance_id): """Kills all jobs related to INSTANCE_ID""" click_log.basic_config() if dry_run: logging.warning('Running in dry-run mode, will only show changes') kill_jobs(profile, dry_run, instance_id)
def reboot(self, action=RebootAction.REBOOT): reboot_or_halt = 'halt' if action == RebootAction.HALT else 'reboot' if self.dry_run: logging.info( f"Would {reboot_or_halt} host '{self['name']}' with action '{action}'" ) return True if self.execute( 'virsh list | grep running | wc -l').stdout.strip() != '0': logging.error( f"Host '{self['name']}' has running VMs, will not {reboot_or_halt}", self.log_to_slack) return False try: if action == RebootAction.REBOOT: logging.info(f"Rebooting '{self['name']}' in 60s", self.log_to_slack) self.execute('shutdown -r 1', sudo=True) elif action == RebootAction.HALT: logging.info( f"Halting '{self['name']}' in 60s, be sure to start it manually to continue the rolling reboot", self.log_to_slack) self.execute('shutdown -h 1', sudo=True) elif action == RebootAction.FORCE_RESET: logging.info(f"Force resetting '{self['name']}'", self.log_to_slack) self.execute('sync', sudo=True) self.execute('echo b > /proc/sysrq-trigger', sudo=True) elif action == RebootAction.UPGRADE_FIRMWARE: logging.info( f"Rebooting '{self['name']}' after firmware upgrade", self.log_to_slack) self.execute( "tmux new -d 'yes | sudo /usr/sbin/smartupdate upgrade && sudo reboot'", pty=True) elif action == RebootAction.PXE_REBOOT: logging.info(f"PXE Rebooting '{self['name']}' in 10s", self.log_to_slack) self.execute( "tmux new -d 'sleep 10 && sudo /usr/sbin/hp-reboot pxe'", pty=True) elif action == RebootAction.SKIP: logging.info(f"Skipping reboot for '{self['name']}'", self.log_to_slack) except Exception as e: logging.warning( f"Ignoring exception as it's likely related to the {reboot_or_halt}: {e}", self.log_to_slack) return True
def main(profile, zwps_to_cwps, add_affinity_group, destination_dc, is_project_vm, skip_within_cluster, dry_run, vm, cluster): """Live migrate VM to CLUSTER""" click_log.basic_config() log_to_slack = True logging.task = 'Live Migrate VM' logging.slack_title = 'Domain' if dry_run: log_to_slack = False logging.warning('Running in dry-run mode, will only show changes') co = CosmicOps(profile=profile, dry_run=dry_run, log_to_slack=log_to_slack) cs = CosmicSQL(server=profile, dry_run=dry_run) # Work around migration issue: first in the same pod to limit possible hiccup vm_instance = co.get_vm(name=vm, is_project_vm=is_project_vm) if not vm_instance: logging.error(f"Cannot migrate, VM '{vm}' not found!") sys.exit(1) if not vm_instance['state'] == 'Running': logging.error(f"Cannot migrate, VM has has state: '{vm_instance['state']}'") sys.exit(1) source_host = co.get_host(id=vm_instance['hostid']) source_cluster = co.get_cluster(id=source_host['clusterid']) if not skip_within_cluster: if not vm_instance.migrate_within_cluster(vm=vm_instance, source_cluster=source_cluster, source_host=source_host, instancename=vm_instance): logging.info(f"VM Migration failed at {datetime.now().strftime('%d-%m-%Y %H:%M:%S')}\n") sys.exit(1) if not live_migrate(co, cs, cluster, vm, destination_dc, add_affinity_group, is_project_vm, zwps_to_cwps, log_to_slack, dry_run): logging.info(f"VM Migration failed at {datetime.now().strftime('%d-%m-%Y %H:%M:%S')}\n") sys.exit(1) logging.info(f"VM Migration completed at {datetime.now().strftime('%d-%m-%Y %H:%M:%S')}\n")
def main(profile, uuid, network_uuid, dry_run, vpc): """VPC restart script""" click_log.basic_config() log_to_slack = True if uuid and network_uuid: logging.error('You can not specify --uuid and --network-uuid together') sys.exit(1) if dry_run: log_to_slack = False logging.warning('Running in dry-run mode, will only show changes') co = CosmicOps(profile=profile, dry_run=dry_run, log_to_slack=log_to_slack) if uuid: vpc = co.get_vpc(id=vpc) elif network_uuid: network = co.get_network(id=vpc) if not network: sys.exit(1) vpc = co.get_vpc(id=network['vpcid']) else: vpc = co.get_vpc(name=vpc) if not vpc: sys.exit(1) logging.slack_title = 'Domain' logging.slack_value = vpc['domain'] logging.instance_name = vpc['name'] logging.zone_name = vpc['zonename'] if not vpc.restart(): sys.exit(1) logging.info( f"Successfully restarted VPC '{vpc['name']}' ({vpc['id']}) with clean up", log_to_slack)
def main(profile, max_iops, zwps_to_cwps, is_project_vm, dry_run, vm, storage_pool): """Live migrate VM volumes to STORAGE_POOL""" click_log.basic_config() log_to_slack = True logging.task = 'Live Migrate VM Volumes' logging.slack_title = 'Domain' if dry_run: log_to_slack = False logging.warning('Running in dry-run mode, will only show changes') co = CosmicOps(profile=profile, dry_run=dry_run, log_to_slack=log_to_slack) cs = CosmicSQL(server=profile, dry_run=dry_run) if not live_migrate_volumes(storage_pool, co, cs, dry_run, is_project_vm, log_to_slack, max_iops, vm, zwps_to_cwps): sys.exit(1)
def main(profile, is_project_vm, dry_run, vm, cluster, destination_dc, destination_so): """Offline migrate VM to CLUSTER""" click_log.basic_config() log_to_slack = True logging.task = 'Offline Migrate VM' logging.slack_title = 'Domain' if dry_run: log_to_slack = False logging.warning('Running in dry-run mode, will only show changes') co = CosmicOps(profile=profile, dry_run=dry_run, log_to_slack=log_to_slack) cs = CosmicSQL(server=profile, dry_run=dry_run) target_cluster = co.get_cluster(name=cluster) if not target_cluster: sys.exit(1) vm = co.get_vm(name=vm, is_project_vm=is_project_vm) if not vm: sys.exit(1) if destination_dc and destination_dc not in DATACENTERS: logging.error(f"Unknown datacenter '{destination_dc}', should be one of {str(DATACENTERS)}") sys.exit(1) logging.instance_name = vm['instancename'] logging.slack_value = vm['domain'] logging.vm_name = vm['name'] logging.zone_name = vm['zonename'] target_storage_pool = None try: # Get CLUSTER scoped volume (no NVMe or ZONE-wide) while target_storage_pool is None or target_storage_pool['scope'] != 'CLUSTER': target_storage_pool = choice(target_cluster.get_storage_pools()) except IndexError: logging.error(f"No storage pools found for cluster '{target_cluster['name']}") sys.exit(1) if vm['state'] == 'Running': need_to_stop = True auto_start_vm = True else: need_to_stop = False auto_start_vm = False if destination_dc: for datacenter in DATACENTERS: if datacenter == destination_dc: continue if datacenter in vm['serviceofferingname']: new_offering = vm['serviceofferingname'].replace(datacenter, destination_dc) logging.info( f"Replacing '{vm['serviceofferingname']}' with '{new_offering}'") cs.update_service_offering_of_vm(vm['instancename'], new_offering) vm = co.get_vm(name=vm['instancename'], is_project_vm=is_project_vm) break if destination_so: logging.info( f"Replacing '{vm['serviceofferingname']}' with '{destination_so}'") cs.update_service_offering_of_vm(vm['instancename'], destination_so) vm = co.get_vm(name=vm['instancename'], is_project_vm=is_project_vm) vm_service_offering = co.get_service_offering(id=vm['serviceofferingid']) if vm_service_offering: storage_tags = vm_service_offering['tags'] if 'tags' in vm_service_offering else '' if not storage_tags: logging.warning('VM service offering has no storage tags') else: if storage_tags not in target_storage_pool['tags']: logging.error( f"Can't migrate '{vm['name']}', storage tags on target cluster ({target_storage_pool['tags']}) to not contain the tags on the VM's service offering ({storage_tags})'") sys.exit(1) if need_to_stop: if not vm.stop(): sys.exit(1) volumes = vm.get_volumes() for volume in volumes: if volume['storage'] == target_storage_pool['name']: logging.warning( f"Volume '{volume['name']}' ({volume['id']}) already on cluster '{target_cluster['name']}', skipping...") continue source_storage_pool = co.get_storage_pool(name=volume['storage']) if not source_storage_pool: sys.exit(1) if source_storage_pool['scope'] == 'ZONE': logging.warning(f"Scope of volume '{volume['name']}' ({volume['id']}) is ZONE, skipping...") continue if not volume.migrate(target_storage_pool): sys.exit(1) with click_spinner.spinner(): while True: volume.refresh() if volume['state'] == 'Ready': break logging.warning( f"Volume '{volume['name']}' ({volume['id']}) is in '{volume['state']}' state instead of 'Ready', sleeping...") time.sleep(60) if auto_start_vm: destination_host = target_cluster.find_migration_host(vm) if not destination_host: sys.exit(1) if not vm.start(destination_host): sys.exit(1) else: logging.info(f"Not starting VM '{vm['name']}' as it was not running", log_to_slack)
def live_migrate(co, cs, cluster, vm_name, destination_dc, add_affinity_group, is_project_vm, zwps_to_cwps, log_to_slack, dry_run): if destination_dc and destination_dc not in DATACENTERS: logging.error(f"Unknown datacenter '{destination_dc}', should be one of {str(DATACENTERS)}") return False target_cluster = co.get_cluster(name=cluster) if not target_cluster: return False vm = co.get_vm(name=vm_name, is_project_vm=is_project_vm) if not vm: return False if not vm['state'] == 'Running': logging.error(f"Cannot migrate, VM has state: '{vm['state']}'") return False for vm_snapshot in vm.get_snapshots(): logging.error(f"Cannot migrate, VM has VM snapshots: '{vm_snapshot['name']}'") return False if 'maintenancepolicy' in vm and vm['maintenancepolicy'] == 'ShutdownAndStart': logging.error(f"Cannot migrate, VM has maintenance policy: '{vm['maintenancepolicy']}'") return False logging.instance_name = vm['instancename'] logging.slack_value = vm['domain'] logging.vm_name = vm['name'] logging.zone_name = vm['zonename'] source_host = co.get_host(id=vm['hostid']) if not source_host: return False source_cluster = co.get_cluster(id=source_host['clusterid']) if not source_cluster: return False logging.cluster = source_cluster['name'] if source_cluster['id'] == target_cluster['id']: logging.error(f"VM '{vm['name']}' is already running on cluster '{target_cluster['name']}'") return False if not dry_run: disk_info = source_host.get_disks(vm) for path, disk_info in disk_info.items(): _, path, _, _, size = cs.get_volume_size(path) if int(size) != int(disk_info['size']): logging.warning( f"Size for '{disk_info['path']}' in DB ({size}) is less than libvirt reports ({disk_info['size']}), updating DB") cs.update_volume_size(vm['instancename'], path, disk_info['size']) if zwps_to_cwps: if not dry_run: logging.info(f"Converting any ZWPS volume of VM '{vm['name']}' to CWPS before starting the migration", log_to_slack=log_to_slack) if not cs.update_zwps_to_cwps('MCC_v1.CWPS', instance_name=vm['instancename']): logging.error(f"Failed to apply CWPS disk offering to VM '{vm['name']}'", log_to_slack=log_to_slack) return False else: logging.info('Would have changed the diskoffering from ZWPS to CWPS of all ZWPS volumes') if destination_dc: for datacenter in DATACENTERS: if datacenter == destination_dc: continue if datacenter in vm['serviceofferingname']: logging.info( f"Replacing '{datacenter}' with '{destination_dc}' in offering '{vm['serviceofferingname']}'") cs.update_service_offering_of_vm(vm['instancename'], vm['serviceofferingname'].replace(datacenter, destination_dc)) break zwps_found = False zwps_name = None root_disk = None cwps_found = False hwps_found = False data_disks_to_zwps = [] zwps_disks_to_cwps = [] for volume in vm.get_volumes(): for snapshot in volume.get_snapshots(): logging.error(f"Cannot migrate, volume '{volume['name']}' has snapshot: '{snapshot['name']}'") return False if volume['type'] == 'DATADISK': if volume['state'] != 'Ready': logging.error(f"Volume '{volume['name']}' has non-READY state '{volume['state']}'. halting") return False source_storage_pool = co.get_storage_pool(name=volume['storage']) if source_storage_pool['scope'] == 'CLUSTER': cwps_found = True data_disks_to_zwps.append(volume) elif source_storage_pool['scope'] == 'ZONE': zwps_found = True zwps_name = volume['storage'] if zwps_to_cwps: zwps_disks_to_cwps.append(volume) elif source_storage_pool['scope'] == 'HOST': hwps_found = True elif volume['type'] == 'ROOT': root_disk = volume if hwps_found: logging.error(f"VM '{vm['name']} has HWPS data disks attached. This is currently not handled by this script.", log_to_slack=log_to_slack) return False if cwps_found and zwps_found: logging.info( f"VM '{vm['name']}' has both ZWPS and CWPS data disks attached. We are going to temporarily migrate all CWPS volumes to ZWPS.", log_to_slack=log_to_slack) for volume in data_disks_to_zwps: if not temp_migrate_volume(co=co, dry_run=dry_run, log_to_slack=log_to_slack, volume=volume, vm=vm, target_pool_name=zwps_name): logging.error(f"Volume '{volume['name']}'failed to migrate") return False if zwps_found: logging.info(f"ZWPS data disk attached to VM '{vm['name']}") logging.info( f"For migration to succeed we need to migrate root disk '{root_disk['name']}' to ZWPS pool '{zwps_name}' first") if root_disk['storage'] == zwps_name: logging.warning(f"Volume '{root_disk['name']}' already on desired storage pool") else: if not temp_migrate_volume(co=co, dry_run=dry_run, log_to_slack=log_to_slack, volume=root_disk, vm=vm, target_pool_name=zwps_name): logging.error(f"Volume '{root_disk['name']}'failed to migrate") return False logging.info(f"ROOT disk is at storage pool: '{root_disk['storage']}'") destination_host = target_cluster.find_migration_host(vm) if not destination_host: return False if dry_run: if add_affinity_group: logging.info( f"Would have added affinity group '{add_affinity_group}' to VM '{vm['name']}'") logging.info( f"Would live migrate VM '{vm['name']}' to '{destination_host['name']}'") return True root_storage_pool = co.get_storage_pool(name=root_disk['storage']) if not root_storage_pool: logging.error(f"Unable to fetch storage pool details foor ROOT disk '{root_disk['name']}'", log_to_slack=log_to_slack) return False migrate_with_volume = False if root_storage_pool['scope'] == 'ZONE' else True if migrate_with_volume: for volume in vm.get_volumes(): for target_pool in co.get_all_storage_pools(clusterid=target_cluster['id']): if not co.clean_old_disk_file(host=destination_host, dry_run=dry_run, volume=volume, target_pool_name=target_pool['name']): logging.error(f"Cleaning volume '{root_disk['name']}' failed") return False if not vm.migrate(destination_host, with_volume=migrate_with_volume, source_host=source_host, instancename=vm['instancename']): return False with click_spinner.spinner(): while True: vm.refresh() if vm['state'] == 'Running': break logging.warning(f"VM '{vm['name']} is in '{vm['state']}' state instead of 'Running', sleeping...") time.sleep(60) if source_host['name'] == vm['hostname']: logging.error( f"VM '{vm['name']}' failed to migrate to '{destination_host['name']}' on cluster '{target_cluster['name']}'") return False else: if add_affinity_group: if not cs.add_vm_to_affinity_group(vm['instancename'], add_affinity_group): logging.error( f"Failed to add affinity group '{add_affinity_group}' to VM '{vm['name']}'") else: logging.info( f"Successfully added affinity group '{add_affinity_group}' to VM '{vm['name']}'") logging.info( f"VM '{vm['name']}' successfully migrated to '{destination_host['name']}' on cluster '{target_cluster['name']}'") if not migrate_with_volume: vm.refresh() target_pool = choice(target_cluster.get_storage_pools(scope='CLUSTER')) if not temp_migrate_volume(co=co, dry_run=dry_run, log_to_slack=log_to_slack, volume=root_disk, vm=vm, target_pool_name=target_pool['name']): logging.error(f"Volume '{root_disk['name']}'failed to migrate") return False if cwps_found and zwps_found: for volume in data_disks_to_zwps: target_pool = choice(target_cluster.get_storage_pools(scope='CLUSTER')) if not temp_migrate_volume(co=co, dry_run=dry_run, log_to_slack=log_to_slack, volume=volume, vm=vm, target_pool_name=target_pool['name']): logging.error(f"Volume '{volume['name']}'failed to migrate") return False if zwps_to_cwps: for volume in zwps_disks_to_cwps: target_pool = choice(target_cluster.get_storage_pools(scope='CLUSTER')) if not temp_migrate_volume(co=co, dry_run=dry_run, log_to_slack=log_to_slack, volume=volume, vm=vm, target_pool_name=target_pool['name']): logging.error(f"Volume '{volume['name']}'failed to migrate") return False return True
def live_migrate_volumes(target_storage_pool_name, co, cs, dry_run, is_project_vm, log_to_slack, max_iops, vm_name, zwps_to_cwps): target_storage_pool = co.get_storage_pool(name=target_storage_pool_name) if not target_storage_pool: return False # disable setting max IOPS, if max_iops != 0 set_max_iops = max_iops != 0 vm = co.get_vm(name=vm_name, is_project_vm=is_project_vm) if not vm: return False logging.instance_name = vm['instancename'] logging.slack_value = vm['domain'] logging.vm_name = vm['name'] logging.zone_name = vm['zonename'] logging.info( f"Starting live migration of volumes of VM '{vm['name']}' to storage pool '{target_storage_pool['name']}' ({target_storage_pool['id']})", log_to_slack=log_to_slack) host = co.get_host(id=vm['hostid']) if not host: return False cluster = co.get_cluster(id=host['clusterid']) if not cluster: return False logging.cluster = cluster['name'] if zwps_to_cwps: if not dry_run: logging.info( f"Converting any ZWPS volume of VM '{vm['name']}' to CWPS before starting the migration", log_to_slack=log_to_slack) if not cs.update_zwps_to_cwps('MCC_v1.CWPS', instance_name=vm['instancename']): logging.error( f"Failed to apply CWPS disk offering to VM '{vm['name']}'", log_to_slack=log_to_slack) return False else: logging.info( 'Would have changed the diskoffering from ZWPS to CWPS of all ZWPS volumes' ) if not dry_run: disk_info = host.get_disks(vm) for path, disk_info in disk_info.items(): _, path, _, _, size = cs.get_volume_size(path) if int(size) != int(disk_info['size']): logging.warning( f"Size for '{disk_info['path']}' in DB ({size}) is less than libvirt reports ({disk_info['size']}), updating DB" ) cs.update_volume_size(vm['instancename'], path, disk_info['size']) if set_max_iops: if not dry_run: if not host.set_iops_limit(vm, max_iops): return False else: logging.info(f"Would have set an IOPS limit to '{max_iops}'") else: logging.info(f'Not setting an IOPS limit as it is disabled') if not dry_run: if not host.merge_backing_files(vm): if set_max_iops: host.set_iops_limit(vm, 0) return False else: logging.info(f'Would have merged all backing files if any exist') for volume in vm.get_volumes(): if volume['storageid'] == target_storage_pool['id']: logging.warning( f"Skipping volume '{volume['name']}' as it's already on the specified storage pool", log_to_slack=log_to_slack) continue source_storage_pool = co.get_storage_pool(id=volume['storageid']) if not source_storage_pool: continue if source_storage_pool['scope'] == 'Host' or ( source_storage_pool['scope'] == 'ZONE' and not zwps_to_cwps): logging.warning( f"Skipping volume '{volume['name']}' as it's scope is '{source_storage_pool['scope']}'", log_to_slack=log_to_slack) continue if not co.clean_old_disk_file( host=host, dry_run=dry_run, volume=volume, target_pool_name=target_storage_pool['name']): logging.error(f"Cleaning volume '{volume['name']}' failed on zwps") return False if dry_run: logging.info( f"Would migrate volume '{volume['name']}' to storage pool '{target_storage_pool['name']}' ({target_storage_pool['id']})" ) continue logging.info( f"Starting migration of volume '{volume['name']}' from storage pool '{source_storage_pool['name']}' to storage pool '{target_storage_pool['name']}' ({target_storage_pool['id']})", log_to_slack=log_to_slack) # get the source host to read the blkjobinfo source_host = co.get_host(id=vm['hostid']) if not volume.migrate(target_storage_pool, live_migrate=True, source_host=source_host, vm=vm): continue with click_spinner.spinner(): while True: volume.refresh() if volume['state'] == 'Ready': break logging.warning( f"Volume '{volume['name']}' is in '{volume['state']}' state instead of 'Ready', sleeping..." ) time.sleep(60) logging.info( f"Finished migration of volume '{volume['name']}' from storage pool '{source_storage_pool['name']}' to storage pool '{target_storage_pool['name']}' ({target_storage_pool['id']})", log_to_slack=log_to_slack) logging.info( f"Finished live migration of volumes of VM '{vm['name']}' to storage pool '{target_storage_pool['name']}' ({target_storage_pool['id']})", log_to_slack=log_to_slack) if not dry_run: host.set_iops_limit(vm, 0) return True
def main(profile, dry_run, ignore_volumes, zwps_to_cwps, skip_disk_offerings, only_project, source_cluster_name, destination_cluster_name): """Migrate offline volumes from SOURCE_CLUSTER to DESTINATION_CLUSTER""" click_log.basic_config() if source_cluster_name == destination_cluster_name: logging.error('Destination cluster cannot be the source cluster!') sys.exit(1) if dry_run: logging.warning('Running in dry-run mode, will only show changes') co = CosmicOps(profile=profile, dry_run=dry_run) cs = CosmicSQL(server=profile, dry_run=dry_run) source_cluster = co.get_cluster(name=source_cluster_name) source_storage_pools = co.get_all_storage_pools(name=source_cluster_name) if not source_cluster and not source_storage_pools: logging.error(f"Source cluster not found:'{source_cluster_name}'!") sys.exit(1) destination_cluster = co.get_cluster(name=destination_cluster_name) if not destination_cluster: logging.error( f"Destination cluster not found:'{destination_cluster_name}'!") sys.exit(1) if source_cluster: try: source_storage_pools = source_cluster.get_storage_pools( scope='CLUSTER') except IndexError: logging.error( f"No storage pools found for cluster '{source_cluster['name']}'" ) sys.exit(1) logging.info('Source storage pools found:') for source_storage_pool in source_storage_pools: logging.info(f" - '{source_storage_pool['name']}'") try: destination_storage_pools = destination_cluster.get_storage_pools( scope='CLUSTER') except IndexError: logging.error( f"No storage pools found for cluster '{destination_cluster['name']}'" ) sys.exit(1) logging.info('Destination storage pools found:') for destination_storage_pool in destination_storage_pools: logging.info(f" - '{destination_storage_pool['name']}'") if ignore_volumes: ignore_volumes = ignore_volumes.replace(' ', '').split(',') logging.info(f"Ignoring volumes: {str(ignore_volumes)}") if skip_disk_offerings: skip_disk_offerings = skip_disk_offerings.replace(' ', '').split(',') logging.info(f"Skipping disk offerings: {str(skip_disk_offerings)}") for source_storage_pool in source_storage_pools: destination_storage_pool = choice(destination_storage_pools) volumes = source_storage_pool.get_volumes(only_project) for volume in volumes: if volume['id'] in ignore_volumes: continue if skip_disk_offerings and volume.get( 'diskofferingname') in skip_disk_offerings: logging.warning( f"Volume '{volume['name']}' has offering '{volume['diskofferingname']}', skipping..." ) continue if 'storage' not in volume: logging.warning( f"No storage attribute found for volume '{volume['name']}' ({volume['id']}), skipping..." ) continue if volume['storage'] == destination_storage_pool['name']: logging.warning( f"Volume '{volume['name']}' ({volume['id']}) already on cluster '{destination_cluster['name']}', skipping..." ) continue if volume['state'] != 'Ready': logging.warning( f"Volume '{volume['name']}' ({volume['id']}) is in state '{volume['state']}', skipping..." ) continue if 'vmstate' in volume and volume['vmstate'] != 'Stopped': logging.warning( f"Volume '{volume['name']}' ({volume['id']}) is attached to {volume['vmstate']} VM '{volume['vmname']}', skipping..." ) continue if zwps_to_cwps: if not dry_run: logging.info( f"Converting ZWPS volume '{volume['name']}' to CWPS before starting the migration" ) if not cs.update_zwps_to_cwps('MCC_v1.CWPS', volume_id=volume['id']): logging.error( f"Failed to apply CWPS disk offering to volume '{volume['name']}'" ) return False else: logging.info( f"Would have changed the diskoffering for volume '{volume['name']}' to CWPS before starting the migration" ) if source_cluster: logging.info( f"Volume '{volume['name']}' will be migrated from cluster '{source_cluster['name']}' to '{destination_cluster['name']}'" ) else: logging.info( f"Volume '{volume['name']}' will be migrated from storage pool '{source_storage_pool['name']}' to '{destination_cluster['name']}'" ) if not volume.migrate(destination_storage_pool): continue
def main(profile, is_project_router, only_when_required, cleanup, dry_run, router): """Router restart and upgrade script""" click_log.basic_config() log_to_slack = True if dry_run: log_to_slack = False logging.warning('Running in dry-run mode, will only show changes') co = CosmicOps(profile=profile, dry_run=dry_run, log_to_slack=log_to_slack) router = co.get_router(name=router, is_project_router=is_project_router) if not router: sys.exit(1) logging.instance_name = router['name'] logging.slack_title = 'Domain' logging.slack_value = router['domain'] host = co.get_host(id=router['hostid']) if not host: sys.exit(1) cluster = co.get_cluster(id=host['clusterid']) if not cluster: sys.exit(1) logging.cluster = cluster['name'] if only_when_required and not router['requiresupgrade']: logging.info( f"Router '{router['name']}' does not need to be upgraded. Will not reboot because --only-when-required was specified." ) sys.exit(0) if cleanup: if not router['vpcid']: logging.error( f"Cleanup specified but no VPC ID found for router '{router['name']}'" ) sys.exit(1) logging.task = 'Restart VPC with clean up' vpc = co.get_vpc(id=router['vpcid']) if not vpc: sys.exit(1) if not vpc.restart(): sys.exit(1) logging.info( f"Successfully restarted VPC '{vpc['name']}' with cleanup for router '{router['name']}'" ) else: logging.task = 'Reboot virtual router' if not router.reboot(): sys.exit(1) logging.info(f"Successfully rebooted router '{router['name']}'", log_to_slack)
def main(dry_run, zwps_cluster, destination_cluster, virtual_machines, force_end_hour): """Empty ZWPS by migrating VMs and/or it's volumes to the destination cluster.""" click_log.basic_config() if force_end_hour: try: force_end_hour = int(force_end_hour) except ValueError as e: logging.error( f"Specified time:'{force_end_hour}' is not a valid integer due to: '{e}'" ) sys.exit(1) if force_end_hour >= 24: logging.error(f"Specified time:'{force_end_hour}' should be < 24") sys.exit(1) profile = 'nl2' log_to_slack = True logging.task = 'Live Migrate VM Volumes' logging.slack_title = 'Domain' if dry_run: log_to_slack = False logging.warning('Running in dry-run mode, will only show changes') co = CosmicOps(profile=profile, dry_run=dry_run, log_to_slack=log_to_slack) if not dry_run: cs = CosmicSQL(server=profile, dry_run=dry_run) else: cs = None zwps_storage_pools = [] for storage_pool in co.get_all_storage_pools(): if zwps_cluster.upper() in storage_pool['name']: zwps_storage_pools.append(storage_pool) logging.info('ZWPS storage pools found:') for zwps_storage_pool in zwps_storage_pools: logging.info(f" - '{zwps_storage_pool['name']}'") target_cluster = co.get_cluster(name=destination_cluster) if not target_cluster: logging.error( f"Destination cluster not found:'{target_cluster['name']}'!") sys.exit(1) try: destination_storage_pools = target_cluster.get_storage_pools( scope='CLUSTER') except IndexError: logging.error( f"No storage pools found for cluster '{target_cluster['name']}'") sys.exit(1) logging.info('Destination storage pools found:') for target_storage_pool in destination_storage_pools: logging.info(f" - '{target_storage_pool['name']}'") target_storage_pool = random.choice(destination_storage_pools) volumes = [] for zwps_storage_pool in zwps_storage_pools: vols = co.get_all_volumes(list_all=True, storageid=zwps_storage_pool['id']) if vols: volumes += vols vm_ids = [] logging.info('Volumes found:') for volume in volumes: for virtual_machine in virtual_machines: if re.search(virtual_machine, volume['vmname'], re.IGNORECASE): logging.info( f" - '{volume['name']}' on VM '{volume['vmname']}'") if volume['virtualmachineid'] not in vm_ids: vm_ids.append(volume['virtualmachineid']) vms = [] for vm_id in vm_ids: vm = co.get_vm(id=vm_id) if vm['affinitygroup']: for affinitygroup in vm['affinitygroup']: if 'DedicatedGrp' in affinitygroup['name']: logging.warning( f"Skipping VM '{vm['name']}' because of 'DedicatedGrp' affinity group" ) continue vms.append(vm) logging.info('Virtualmachines found:') for vm in vms: logging.info(f" - '{vm['name']}'") logging.info( f"Starting live migration of volumes and/or virtualmachines from the ZWPS storage pools to storage pool '{target_cluster['name']}'" ) for vm in vms: """ Can we start a new migration? """ if force_end_hour: now = datetime.datetime.now(pytz.timezone('CET')) if now.hour >= force_end_hour: logging.info( f"Stopping migration batch. We are not starting new migrations after '{force_end_hour}':00", log_to_slack=log_to_slack) sys.exit(0) source_host = co.get_host(id=vm['hostid']) source_cluster = co.get_cluster(zone='nl2', id=source_host['clusterid']) if source_cluster['name'] == target_cluster['name']: """ VM is already on the destination cluster, so we only need to migrate the volumes to this storage pool """ logging.info( f"Starting live migration of volumes of VM '{vm['name']}' to storage pool '{target_storage_pool['name']}' ({target_storage_pool['id']})", log_to_slack=log_to_slack) live_migrate_volumes(target_storage_pool['name'], co, cs, dry_run, False, log_to_slack, 0, vm['name'], True) else: """ VM needs to be migrated live to the destination cluster, including volumes """ live_migrate(co=co, cs=cs, cluster=target_cluster['name'], vm_name=vm['name'], destination_dc=None, add_affinity_group=None, is_project_vm=None, zwps_to_cwps=True, log_to_slack=log_to_slack, dry_run=dry_run)
def main(profile, ignore_hosts, only_hosts, skip_os_version, reboot_action, pre_empty_script, post_empty_script, post_reboot_script, dry_run, cluster): """Perform rolling reboot of hosts in CLUSTER""" click_log.basic_config() log_to_slack = True logging.task = 'Rolling Reboot' logging.slack_title = 'Hypervisor' logging.instance_name = 'N/A' logging.vm_name = 'N/A' logging.cluster = cluster if dry_run: log_to_slack = False logging.warning('Running in dry-run mode, will only show changes') co = CosmicOps(profile=profile, dry_run=dry_run, log_to_slack=log_to_slack) cluster = co.get_cluster(name=cluster) if not cluster: sys.exit(1) hosts = cluster.get_all_hosts() logging.debug(f"Found hosts: {hosts}") if ignore_hosts: ignore_hosts = ignore_hosts.replace(' ', '').split(',') logging.info(f"Ignoring hosts: {str(ignore_hosts)}") hosts = [h for h in hosts if h['name'] not in ignore_hosts] elif only_hosts: only_hosts = only_hosts.replace(' ', '').split(',') logging.info(f"Only processing hosts: {str(only_hosts)}") hosts = [h for h in hosts if h['name'] in only_hosts] if skip_os_version: logging.info(f"Skipping hosts with OS: {skip_os_version}") hosts = [ h for h in hosts if skip_os_version not in h['hypervisorversion'] ] hosts.sort(key=itemgetter('name')) target_host = None for host in hosts: logging.slack_value = host['name'] logging.zone_name = host['zonename'] logging.info(f"Processing host {host['name']}", log_to_slack) for script in filter( None, (pre_empty_script, post_empty_script, post_reboot_script)): path = Path(script) host.copy_file(str(path), f'/tmp/{path.name}', mode=0o755) if pre_empty_script: host.execute(f'/tmp/{Path(pre_empty_script).name}', sudo=True, hide_stdout=False, pty=True) if host['resourcestate'] != 'Disabled': if not host.disable(): sys.exit(1) if host['state'] != 'Up' and not dry_run: logging.error( f"Host '{host['name']} is not up (state: '{host['state']}'), aborting", log_to_slack) sys.exit(1) running_vms = len(host.get_all_vms()) logging.info( f"Found {running_vms} running on host '{host['name']}'. Will now start migrating them to other hosts in the same cluster", log_to_slack) while True: (_, _, failed) = host.empty(target=target_host) if failed == 0: break if target_host: logging.warning( f"Failed to empty host '{host['name']}' with target '{target_host['name']}', resetting target host and retrying...", log_to_slack) target_host = None else: logging.warning( f"Failed to empty host '{host['name']}', retrying...", log_to_slack) time.sleep(5) logging.info(f"Host {host['name']} is empty", log_to_slack) if post_empty_script: host.execute(f'/tmp/{Path(post_empty_script).name}', sudo=True, hide_stdout=False, pty=True) if not host.reboot(reboot_action): sys.exit(1) if reboot_action != RebootAction.SKIP: host.wait_until_offline() host.wait_until_online() if post_reboot_script: host.execute(f'/tmp/{Path(post_reboot_script).name}', sudo=True, hide_stdout=False, pty=True) if not host.enable(): sys.exit(1) host.wait_for_agent() host.restart_vms_with_shutdown_policy() target_host = host
def empty(self, target=None): total = success = failed = 0 all_vms = self.get_all_vms() + self.get_all_project_vms( ) + self.get_all_routers() + self.get_all_project_routers( ) + self.get_all_system_vms() if not all_vms: logging.warning(f"No VMs found on host '{self['name']}'") return total, success, failed total = len(all_vms) target_message = f" to target '{target['name']}'" if target else '' if self.dry_run: logging.info( f"Dry run of VM migration away from host '{self['name']}'" + target_message) else: logging.info(f"Migrating VMs away from host '{self['name']}'" + target_message) for vm in all_vms: if vm.get('maintenancepolicy') == 'ShutdownAndStart': if not vm.stop(): failed += 1 continue success += 1 # If the host is disabled, try to restart the VM. Will fail if the host is on NVMe. if self['resourcestate'] == 'Disabled': if vm.start(): continue self.vms_with_shutdown_policy.append(vm) continue vm_on_dedicated_hv = False dedicated_affinity_id = None for affinity_group in vm.get_affinity_groups(): if affinity_group['type'] == 'ExplicitDedication': vm_on_dedicated_hv = True dedicated_affinity_id = affinity_group['id'] if target: available_hosts = [target] else: try: available_hosts = self._ops.cs.findHostsForMigration( virtualmachineid=vm['id']).get('host', []) except CloudStackApiException as e: logging.error( f"Encountered API exception while finding suitable host for migration: {e}" ) failed += 1 continue available_hosts.sort(key=itemgetter('memoryallocated')) migration_host = None for available_host in available_hosts: if not target: # Skip hosts that require storage migration if available_host['requiresStorageMotion']: logging.debug( f"Skipping '{available_host['name']}' because migrating VM '{vm['name']}' requires a storage migration" ) continue # Ensure host is suitable for migration if not available_host['suitableformigration']: logging.debug( f"Skipping '{available_host['name']}' because it's not suitable for migration" ) continue # Only hosts in the same cluster if available_host['clusterid'] != self['clusterid']: logging.debug( f"Skipping '{available_host['name']}' because it's part of a different cluster" ) continue if vm_on_dedicated_hv: # Ensure the dedication group matches if available_host.get( 'affinitygroupid') != dedicated_affinity_id: logging.info( f"Skipping '{available_host['name']}' because host does not match the dedication group of VM '{vm['name']}'" ) continue else: # If the user VM isn't dedicated, skip dedicated hosts if vm.is_user_vm() and 'affinitygroupid' in available_host: logging.info( f"Skipping '{available_host['name']}' because host is dedicated and VM '{vm['name']}' is not" ) continue logging.debug( f"Selected '{available_host['name']}' for VM '{vm['name']}'" ) migration_host = available_host break if not migration_host: logging.error( f"Failed to find host with capacity to migrate VM '{vm['name']}'. Please migrate manually to another cluster." ) failed += 1 continue if not vm.migrate(migration_host): failed += 1 else: success += 1 return total, success, failed
def main(profile, domain_name, cluster_name, pod_name, zone_name, keyword_filter, only_routers, only_routers_to_be_upgraded, no_routers, router_nic_count, nic_count_is_minimum, nic_count_is_maximum, router_max_version, router_min_version, project_name, only_project, ignore_domains, calling_credentials, only_summary, no_summary, log_file): """List VMs""" click_log.basic_config() if log_file: logger = logging.getLogger() logger.addHandler(log_module.FileHandler(log_file)) if project_name and domain_name: logging.error("The project and domain options can't be used together") sys.exit(1) co = CosmicOps(profile=profile, dry_run=False) if ignore_domains: ignore_domains = ignore_domains.replace(' ', '').split(',') logging.info(f"Ignoring domains: {str(ignore_domains)}") if calling_credentials: table_headers = [ 'VM', 'Storage', 'Template', 'Memory', 'Cores', 'Instance', 'Host', 'Domain', 'Account', 'Created', 'LastRebootVersion' ] table_data = [] if only_project: vms = co.get_all_project_vms(list_all=False) else: vms = co.get_all_vms(list_all=False) with click_spinner.spinner(): for vm in vms: if vm['domain'] in ignore_domains: continue storage_size = sum([volume['size'] for volume in vm.get_volumes()]) project_name = vm.get('project', None) vm_account = f"Proj: {project_name}" if project_name else vm['account'] table_data.append([ vm['name'], humanfriendly.format_size(storage_size, binary=True), vm['templatedisplaytext'], humanfriendly.format_size(vm['memory'] * 1024 ** 2, binary=True), vm['cpunumber'], vm['instancename'], vm['hostname'], vm['domain'], vm_account, vm['created'], vm['laststartversion'] ]) logging.info(tabulate(table_data, headers=table_headers, tablefmt='pretty')) sys.exit(0) if domain_name: domain = co.get_domain(name=domain_name) if domain is None or domain == []: logging.error(f"The domain '{str(domain_name)}' could not be found!") sys.exit(1) else: domain = None if project_name: project = co.get_project(name=project_name) if project is None or project == []: logging.error(f"The project '{str(project_name)}' could not be found!") sys.exit(1) else: project = None if pod_name: pod = co.get_pod(name=pod_name) if pod is None or pod == []: logging.error(f"The pod '{str(pod_name)}' could not be found!") sys.exit(1) else: pod = None if zone_name: zone = co.get_zone(name=zone_name) if zone is None or zone == []: logging.error(f"The zone '{str(zone_name)}' could not be found!") sys.exit(1) else: zone = None if cluster_name: clusters = [co.get_cluster(name=cluster_name)] if clusters[0] is None: logging.error(f"The cluster '{str(cluster_name)}' could not be found!") sys.exit(1) elif pod: clusters = co.get_all_clusters(pod=pod) elif zone: clusters = co.get_all_clusters(zone=zone) else: clusters = co.get_all_clusters() total_host_counter = 0 total_vm_counter = 0 total_host_memory = 0 total_vm_memory = 0 total_storage = 0 total_cores = 0 for cluster in clusters: hosts = cluster.get_all_hosts() if not hosts: logging.warning(f"No hosts found on cluster '{cluster['name']}'") continue cluster_host_counter = 0 cluster_vm_counter = 0 cluster_host_memory = 0 cluster_vm_memory = 0 cluster_storage = 0 cluster_cores = 0 cluster_table_headers = [ 'VM', 'Storage', 'Template', 'Router nic count', 'Router version', 'Memory', 'Cores', 'Instance', 'Host', 'Domain', 'Account', 'Created', 'LastRebootVersion' ] cluster_table_data = [] for host in hosts: cluster_host_counter += 1 cluster_host_memory += host['memorytotal'] if not only_routers: if project or only_project: vms = host.get_all_project_vms(project=project) else: vms = host.get_all_vms(domain=domain, keyword_filter=keyword_filter) for vm in vms: if vm['domain'] in ignore_domains: continue cluster_vm_counter += 1 storage_size = sum([volume['size'] for volume in vm.get_volumes()]) cluster_storage += storage_size cluster_vm_memory += vm['memory'] cluster_cores += vm['cpunumber'] vm_project_name = vm.get('project', None) vm_account = f"Proj: {vm['project']}" if vm_project_name else vm['account'] cluster_table_data.append([ vm['name'], humanfriendly.format_size(storage_size, binary=True), vm['templatedisplaytext'], '-', '-', humanfriendly.format_size(vm['memory'] * 1024 ** 2, binary=True), vm['cpunumber'], vm['instancename'], vm['hostname'], vm['domain'], vm_account, vm['created'], vm['laststartversion'] ]) if no_routers: continue if project or only_project: routers = host.get_all_project_routers(project=project) else: routers = host.get_all_routers(domain=domain) for router in routers: if router['domain'] in ignore_domains: continue if router_min_version and LooseVersion(router['version']) < LooseVersion(router_min_version): continue if router_max_version and LooseVersion(router['version']) > LooseVersion(router_max_version): continue if router_nic_count and nic_count_is_minimum: if router_nic_count > len(router['nic']): continue elif router_nic_count and nic_count_is_maximum: if router_nic_count < len(router['nic']): continue elif router_nic_count: if router_nic_count != len(router['nic']): continue if only_routers_to_be_upgraded and not router['requiresupgrade']: continue cluster_vm_counter += 1 service_offering = co.get_service_offering(id=router['serviceofferingid'], system=True) if service_offering: router['memory'] = service_offering['memory'] router['cpunumber'] = service_offering['cpunumber'] cluster_vm_memory += router['memory'] cluster_cores += router['cpunumber'] else: router['memory'] = 'Unknown' router['cpunumber'] = 'Unknown' if router['isredundantrouter']: redundant_state = router['redundantstate'] elif router['vpcid']: redundant_state = 'VPC' else: redundant_state = 'SINGLE' if router['vpcid']: network = co.get_vpc(id=router['vpcid']) else: network = co.get_network(id=router['guestnetworkid']) if network: display_name = network['name'] else: display_name = router['name'] display_name = f"{display_name} ({redundant_state.lower()})" if router['requiresupgrade']: display_name = f"{display_name} [ReqUpdate!]" router_project_name = router.get('project', None) router_account = f"Proj: {router['project']}" if router_project_name else router['account'] cluster_table_data.append([ display_name, '-', '-', len(router['nic']), router['version'], humanfriendly.format_size(router['memory'] * 1024 ** 2, binary=True) if router[ 'memory'] != 'Unknown' else router['memory'], router['cpunumber'], router['name'], router['hostname'], router['domain'], router_account, router['created'], router['laststartversion'] ]) total_host_counter += cluster_host_counter total_host_memory += cluster_host_memory total_vm_memory += cluster_vm_memory total_vm_counter += cluster_vm_counter total_storage += cluster_storage total_cores += cluster_cores if not only_summary: # pragma: no cover logging.info(tabulate(cluster_table_data, headers=cluster_table_headers, tablefmt='pretty')) if not no_summary: # pragma: no cover logging.info(f"\nSummary for '{cluster['name']}':") logging.info(f"Number of VMs: {cluster_vm_counter}") logging.info(f"Number of hosts: {cluster_host_counter}") logging.info( f"Allocated memory: {humanfriendly.format_size(cluster_vm_memory * 1024 ** 2, binary=True)} / {humanfriendly.format_size(cluster_host_memory, binary=True)}") logging.info(f"Allocated cores: {cluster_cores}") logging.info(f"Allocated storage: {humanfriendly.format_size(cluster_storage, binary=True)}") if not no_summary: # pragma: no cover logging.info('\n================== Grand Totals ===============') logging.info(f"Total number of VMs: {total_vm_counter}") logging.info(f"Total number of hosts: {total_host_counter}") logging.info( f"Total allocated memory: {humanfriendly.format_size(total_vm_memory * 1024 ** 2, binary=True)} / {humanfriendly.format_size(total_host_memory, binary=True)}") logging.info(f"Total allocated cores: {total_cores}") logging.info(f"Total allocated storage: {humanfriendly.format_size(total_storage, binary=True)}")