def _cleanup_vgpu_trait(self): logging.info('Cleaning up trait {}...'.format(self.trait_name)) command = ( 'openstack {} --os-placement-api-version 1.6 trait delete {}') command = command.format(openstack_utils.get_cli_auth_args( self.keystone_client), self.trait_name) juju_utils.remote_run( self._get_vgpu_unit_names()[0], remote_cmd=command, timeout=180, fatal=False)
def main(argv): parser = argparse.ArgumentParser() parser.add_argument("application", default="ceph-mon", nargs="*") parser.add_argument("units", default=[0, 1], nargs="*") options = parser.parse_args() application = cli_utils.parse_arg(options, 'application', multiargs=False) units = cli_utils.parse_arg(options, 'units', multiargs=True) juju_utils.remote_run('{}/{}'.format(application, units[-1]), remote_cmd='ceph osd pool create rbd 128') # Check juju_utils.remote_run('{}/{}'.format(application, units[0]), remote_cmd='echo 123456789 > /tmp/input.txt') juju_utils.remote_run( '{}/{}'.format(application, units[0]), remote_cmd='rados put -p rbd test_input /tmp/input.txt') # Check output = juju_utils.remote_run( '{}/{}'.format(application, units[-1]), remote_cmd='rados get -p rbd test_input /dev/stdout') # Cleanup juju_utils.remote_run('{}/{}'.format(application, units[-1]), remote_cmd='rados rm -p rbd test_input') if output.strip() != "123456789": sys.exit(1)
def _assign_vgpu_trait_to_flavor(self, flavor_name): logging.info('Assigning trait {} to flavor {} ...'.format( self.trait_name, flavor_name)) command = ( 'openstack {} flavor set {} --property resources:VGPU=1 ' '--property trait:{}=required') command = command.format(openstack_utils.get_cli_auth_args( self.keystone_client), flavor_name, self.trait_name) juju_utils.remote_run( self._get_vgpu_unit_names()[0], remote_cmd=command, timeout=180, fatal=True)
def _create_vgpu_trait(self, resource_provider_id): logging.info('Creating trait {}...'.format(self.trait_name)) command = ( 'openstack {} --os-placement-api-version 1.6 trait create {}') command = command.format(openstack_utils.get_cli_auth_args( self.keystone_client), self.trait_name) first_unit_name = self._get_vgpu_unit_names()[0] juju_utils.remote_run(first_unit_name, remote_cmd=command, timeout=180, fatal=True) command = ( 'openstack {} --os-placement-api-version 1.6 resource provider ' 'trait set --trait {} {}') command = command.format(openstack_utils.get_cli_auth_args( self.keystone_client), self.trait_name, resource_provider_id) juju_utils.remote_run(first_unit_name, remote_cmd=command, timeout=180, fatal=True)
def get_juju_leader(service): # XXX Juju status should report the leader but doesn't at the moment. # So, until it does run leader on the units for unit in get_juju_units(service): leader_out = juju_utils.remote_run(unit, 'is-leader').strip() if leader_out == 'True': return unit
def create_workload(self, instance_id): """Create a new workload. :param instance_id: instance ID to create workload from :type instance_id: str :returns: workload ID :rtype: str """ workload_id = juju_utils.remote_run( self.trilio_wlm_unit, remote_cmd=self.WORKLOAD_CREATE_CMD.format( auth_args=self.auth_args, instance_id=instance_id), timeout=180, fatal=True, ).strip() retryer = tenacity.Retrying( wait=tenacity.wait_exponential(multiplier=1, max=30), stop=tenacity.stop_after_delay(180), reraise=True, ) retryer( _resource_reaches_status, self.trilio_wlm_unit, self.auth_args, self.WORKLOAD_STATUS_CMD, self.WORKLOAD_FULL_STATUS_CMD, workload_id, "available", ) return workload_id
def _assert_cidr_in_peer_routing_table(peer_unit, cidr): logging.debug("Checking for {} on BGP peer {}".format(cidr, peer_unit)) # Run show ip route bgp on BGP peer routes = juju_utils.remote_run( peer_unit, remote_cmd='vtysh -c "show ip route bgp"') logging.debug(routes) assert cidr in routes, ( "CIDR, {}, not found in BGP peer's routing table".format(cidr))
def test_remote_run(self): _cmd = "do the thing" # Success self.assertEqual(juju_utils.remote_run(self.unit1, _cmd), self.run_output["Stdout"]) self.model.run_on_unit.assert_called_once_with( self.unit1, _cmd, model_name=None, timeout=None) # Non-fatal failure self.model.run_on_unit.return_value = self.error_run_output self.assertEqual(juju_utils.remote_run(self.unit1, _cmd, fatal=False), self.error_run_output["Stderr"]) # Fatal failure with self.assertRaises(Exception): juju_utils.remote_run(self.unit1, _cmd, fatal=True)
def get_ubuntu_version(service): versions = [] for unit in get_juju_units(service): cmd = 'lsb_release -sc' out = juju_utils.remote_run(unit, cmd) versions.append(out.split()[0]) if len(set(versions)) != 1: raise Exception('Unexpected output from ubuntu version check') return versions[0]
def rabbit_unit_status(unit): cmd = 'rabbitmqctl -q cluster_status' output = juju_utils.remote_run( unit, remote_cmd=cmd) output = output.replace('\n', '') matchObj = re.search(r'running_nodes,(.*)}, {partitions', output) machine_numbers = [] for machine in ast.literal_eval(matchObj.group(1)): machine_numbers.append(int(machine.split('-')[-1])) return machine_numbers
def _get_num_vgpu_used(self, resource_provider_id): logging.info('Querying resource provider inventory...') command = ( 'openstack {} resource provider inventory list {} ' '-f value -c used') command = command.format(openstack_utils.get_cli_auth_args( self.keystone_client), resource_provider_id) num_vgpu_used = juju_utils.remote_run( self._get_vgpu_unit_names()[0], remote_cmd=command, timeout=180, fatal=True).strip() return int(num_vgpu_used)
def unit_crm_online(unit): xml_out = juju_utils.remote_run( unit, remote_cmd='crm_mon -X') tree = xml.dom.minidom.parseString(xml_out) itemlist = tree.getElementsByTagName('node') online_units = [] for s in itemlist: if 'online' in s.attributes.keys() \ and s.attributes['online'].value == 'true': online_units.append(int(s.attributes['name'].value.split('-')[-1])) online_units.sort() return online_units
def create_snapshot(self, workload_id): """Create a new snapshot. :param workload_id: workload ID to create snapshot from :type workload_id: str :returns: snapshot ID :rtype: str """ juju_utils.remote_run( self.trilio_wlm_unit, remote_cmd=self.SNAPSHOT_CMD.format(auth_args=self.auth_args, workload_id=workload_id), timeout=180, fatal=True, ) snapshot_id = juju_utils.remote_run( self.trilio_wlm_unit, remote_cmd=self.SNAPSHOT_ID_CMD.format(auth_args=self.auth_args, workload_id=workload_id), timeout=180, fatal=True, ).strip() retryer = tenacity.Retrying( wait=tenacity.wait_exponential(multiplier=1, max=30), stop=tenacity.stop_after_delay(1200), reraise=True, ) retryer( _resource_reaches_status, self.trilio_wlm_unit, self.auth_args, self.SNAPSHOT_STATUS_CMD, self.SNAPSHOT_FULL_STATUS_CMD, snapshot_id, "available", ) return snapshot_id
def _resource_reaches_status(unit, auth_args, status_command, full_status_command, resource_id, target_status): """Wait for a workload resource to reach a status. :param unit: unit to run cli commands on :type unit: zaza_model.Unit :param auth_args: authentication arguments for command :type auth_args: str :param status_command: command to execute to get the resource status that is expected to reach target_status :type status_command: str :param full_status_command: command to execute to get insights on why the resource failed to reach target_status :type full_status_command: str :param resource_id: resource ID to monitor :type resource_id: str :param target_status: status to monitor for :type target_status: str """ resource_status = (juju_utils.remote_run( unit, remote_cmd=status_command.format(auth_args=auth_args, resource_id=resource_id), timeout=180, fatal=True, ).strip().split("\n")[-1]) logging.info("Checking resource ({}) status: {}".format( resource_id, resource_status)) if resource_status == target_status: return full_resource_status = (juju_utils.remote_run( unit, remote_cmd=full_status_command.format(auth_args=auth_args, resource_id=resource_id), timeout=180, fatal=True, ).strip()) raise Exception("Resource not ready:\n{}".format(full_resource_status))
def get_crm_leader(service, resource=None): if not resource: resource = 'res_.*_vip' leader = set() for unit in mojo_utils.get_juju_units(service): crm_out = juju_utils.remote_run(unit, 'sudo crm status') for line in crm_out.splitlines(): line = line.lstrip() if re.match(resource, line): leader.add(line.split()[-1]) if len(leader) != 1: raise Exception('Unexpected leader count: ' + str(len(leader))) return leader.pop().split('-')[-1]
def oneclick_restore(self, snapshot_id): """Restore a workload from a snapshot. :param snapshot_id: snapshot ID to restore :type snapshot_id: str """ juju_utils.remote_run( self.trilio_wlm_unit, remote_cmd=self.ONECLICK_RESTORE_CMD.format( auth_args=self.auth_args, snapshot_id=snapshot_id), timeout=180, fatal=True, ) restore_id = juju_utils.remote_run( self.trilio_wlm_unit, remote_cmd=self.RESTORE_LIST_CMD.format(auth_args=self.auth_args, snapshot_id=snapshot_id), timeout=180, fatal=True, ).strip() retryer = tenacity.Retrying( wait=tenacity.wait_exponential(multiplier=1, max=30), stop=tenacity.stop_after_delay(720), reraise=True, ) retryer( _resource_reaches_status, self.trilio_wlm_unit, self.auth_args, self.RESTORE_STATUS_CMD, self.RESTORE_FULL_STATUS_CMD, restore_id, "available", ) return restore_id
def get_pkg_version(application, pkg): """Return package version. :param application: Application name :type application: string :param pkg: Package name :type pkg: string :returns: List of package version :rtype: list """ versions = [] units = model.get_units(application) for unit in units: cmd = 'dpkg -l | grep {}'.format(pkg) out = juju_utils.remote_run(unit.entity_id, cmd) versions.append(out.split('\n')[0].split()[2]) if len(set(versions)) != 1: raise Exception('Unexpected output from pkg version check') return versions[0]
def check_crm_status(application): juju_units = mojo_utils.get_juju_units(application) if not juju_units: return cmd = 'which crm_mon || echo "Not Found"' output = juju_utils.remote_run( juju_units[0], remote_cmd=cmd) if output.rstrip() == "Not Found": return for unit in juju_units: mach_nums = get_machine_numbers(application) crm_online = unit_crm_online(unit) if mach_nums == crm_online: logging.info('Service %s status on %s look good' .format((application, unit))) else: logging.info('%s != %s' % (str(mach_nums), str(crm_online))) msg = ('Mismatch on crm status for application {} ' 'on unit {}'.format(application, unit)) raise Exception(msg)
def _get_vgpu_resource_provider_id(self, wanted_gpu_address): logging.info('Querying resource providers...') command = ( 'openstack {} resource provider list -f value -c uuid -c name') command = command.format(openstack_utils.get_cli_auth_args( self.keystone_client)) resource_providers = juju_utils.remote_run( self._get_vgpu_unit_names()[0], remote_cmd=command, timeout=180, fatal=True).strip().split('\n') # At this point resource_providers should look like # ['0e1379b8-7bd1-40e6-9f41-93cb5b95e38b node-sparky.maas', # '1bb845a4-cf21-44c2-896e-e877760ad39b \ # node-sparky.maas_pci_0000_c1_00_0'] resource_provider_id = None wanted_resource_provider_substring = 'pci_{}'.format( wanted_gpu_address.replace(':', '_').replace('.', '_')) for resource_provider in resource_providers: if wanted_resource_provider_substring in resource_provider: resource_provider_id = resource_provider.split()[0] self.assertIsNotNone(resource_provider_id) return resource_provider_id
def remote_runs(units): for unit in units: if not juju_utils.remote_run(unit, remote_cmd='uname -a'): raise Exception('Juju run failed on ' + unit)
def panic_unit(unit): panic_cmd = 'sudo bash -c "echo c > /proc/sysrq-trigger"' juju_utils.remote_run(unit, timeout='5s', remote_cmd=panic_cmd, fatal=False)
if line == "": continue hashsum, filename = line.split() ring_data[filename] = hashsum return ring_data def verify_ring_data(ring_data): ring_dict = next(iter(ring_data.values())) for unit in ring_data.keys(): if ring_data[unit] != ring_dict: return False return True sp_units = mojo_utils.get_juju_units('swift-proxy') ring_data = {} for unit in sp_units: cmd = 'ls -1 /etc/swift/*{.builder,.ring.gz,arse} 2>/dev/null ' \ '| xargs -l md5sum' out = juju_utils.remote_run(unit, remote_cmd=cmd) ring_data[unit] = process_ring_info(out) if verify_ring_data(ring_data): logging.info('Ring data consistent accross proxies') sys.exit(0) else: logging.error('Ring data inconsistent accross proxies') sys.exit(1)
def main(argv): # Mount the storage volume juju_utils.remote_run( 'gluster/0', remote_cmd=('mkdir /mnt/gluster && mount -t glusterfs localhost:test ' '/mnt/gluster')) juju_utils.remote_run( 'gluster/1', remote_cmd=('mkdir /mnt/gluster && mount -t glusterfs localhost:test ' '/mnt/gluster')) juju_utils.remote_run( 'gluster/2', remote_cmd=('mkdir /mnt/gluster && mount -t glusterfs localhost:test ' '/mnt/gluster')) # Check juju_utils.remote_run( 'gluster/0', remote_cmd='echo 123456789 > /mnt/gluster/test_input') # Check output = juju_utils.remote_run('gluster/1', remote_cmd='cat /mnt/gluster/test_input') # Cleanup juju_utils.remote_run('gluster/2', remote_cmd='rm /mnt/gluster/test_input') if output.strip() != "123456789": sys.exit(1)
def _install_openstack_cli_on_vgpu_units(self): command = 'snap install openstackclients' for vgpu_unit_name in self._get_vgpu_unit_names(): juju_utils.remote_run(vgpu_unit_name, remote_cmd=command, timeout=180, fatal=True)