def _remove_compute_policy_from_vdc_async(self, *args, ovdc_id, compute_policy_href, task_resource, force=False): vdc = vcd_utils.get_vdc(self._sysadmin_client, vdc_id=ovdc_id, is_admin_operation=True) task_href = task_resource.get('href') user_href = task_resource.User.get('href') org_href = task_resource.Organization.get('href') task = Task(client=self._sysadmin_client) try: self.remove_compute_policy_from_vdc_sync( vdc=vdc, compute_policy_href=compute_policy_href, task_resource=task_resource, force=force) task.update( status=vcd_client.TaskStatus.SUCCESS.value, namespace='vcloud.cse', operation=f"Removed compute policy (href: " f"{compute_policy_href}) from org VDC '{vdc.name}'", # noqa: E501 operation_name='Updating VDC', details='', progress=None, owner_href=vdc.href, owner_name=vdc.name, owner_type=vcd_client.EntityType.VDC.value, user_href=user_href, user_name=self._session.get('user'), task_href=task_href, org_href=org_href, ) except Exception as err: msg = f'Failed to remove compute policy: {compute_policy_href} ' \ f'from the OVDC: {vdc.name}' logger.SERVER_LOGGER.error(msg) # noqa: E501 task.update(status=vcd_client.TaskStatus.ERROR.value, namespace='vcloud.cse', operation=msg, operation_name='Remove org VDC compute policy', details='', progress=None, owner_href=vdc.href, owner_name=vdc.name, owner_type=vcd_client.EntityType.VDC.value, user_href=user_href, user_name=self._session.get('user'), task_href=task_href, org_href=org_href, error_message=f"{err}", stack_trace='')
def reload_templates(request_data, op_ctx): """.""" user_context = op_ctx.get_user_context(api_version=None) user_client = user_context.client if not user_client.is_sysadmin: raise e.UnauthorizedRequestError( error_message= 'Unauthorized to reload CSE native and TKG templates.' # noqa: E501 ) org = vcd_utils.get_org(user_client, user_context.org_name) user_href = org.get_user(user_context.name).get('href') task = Task(user_client) task_resource = task.update( status=TaskStatus.RUNNING.value, namespace='vcloud.cse', operation="Reloading native templates.", operation_name='template operation', details='', progress=None, owner_href=user_context.org_href, owner_name=user_context.org_name, owner_type='application/vnd.vmware.vcloud.org+xml', user_href=user_href, user_name=user_context.name, org_href=user_context.org_href) task_href = task_resource.get('href') op_ctx.is_async = True _reload_templates_async(op_ctx, task_href) return {"task_href": task_href}
def remove_vdc_compute_policy_from_vdc( self, request_context: ctx.RequestContext, # noqa: E501 ovdc_id, compute_policy_href, remove_compute_policy_from_vms=False): # noqa: E501 """Delete the compute policy from the specified vdc. Note: The VDC compute policy need not be created by CSE. :param request_context: request context of remove compute policy request :param str ovdc_id: id of the vdc to assign the policy :param compute_policy_href: policy href to remove :param bool remove_compute_policy_from_vms: If True, will set affected VMs' compute policy to 'System Default' :return: dictionary containing 'task_href'. """ # TODO find an efficient way without passing in request context vdc = vcd_utils.get_vdc(self._sysadmin_client, vdc_id=ovdc_id) org = vcd_utils.get_org(self._sysadmin_client) org.reload() user_name = self._session.get('user') user_href = org.get_user(user_name).get('href') task = Task(self._sysadmin_client) task_resource = task.update( status=vcd_client.TaskStatus.RUNNING.value, namespace='vcloud.cse', operation=f"Removing compute policy (href: {compute_policy_href})" f" from org VDC (vdc id: {ovdc_id})", operation_name='Remove org VDC compute policy', details='', progress=None, owner_href=vdc.href, owner_name=vdc.name, owner_type=vcd_client.EntityType.VDC.value, user_href=user_href, user_name=user_name, org_href=org.href) task_href = task_resource.get('href') request_context.is_async = True self._remove_compute_policy_from_vdc_async( request_context=request_context, task=task, task_href=task_href, user_href=user_href, org_href=org.href, ovdc_id=ovdc_id, compute_policy_href=compute_policy_href, remove_compute_policy_from_vms=remove_compute_policy_from_vms) return {'task_href': task_href}
def remove_compute_policy_from_vdc(self, ovdc_id, compute_policy_href, remove_compute_policy_from_vms=False): """Delete the compute policy from the specified vdc. :param str ovdc_id: id of the vdc to assign the policy :param compute_policy_href: policy href to remove :param bool remove_compute_policy_from_vms: If True, will set affected VMs' compute policy to 'System Default' :return: dictionary containing 'task_href'. """ vdc = pyvcd_utils.get_vdc(self._vcd_client, vdc_id=ovdc_id) # TODO is there no better way to get the client href? org = pyvcd_utils.get_org(self._vcd_client) org.reload() user_name = self._session.get('user') user_href = org.get_user(user_name).get('href') task = Task(self._vcd_client) task_resource = task.update( status=TaskStatus.RUNNING.value, namespace='vcloud.cse', operation=f"Removing compute policy (href: {compute_policy_href})" f" from org VDC (vdc id: {ovdc_id})", operation_name='Remove org VDC compute policy', details='', progress=None, owner_href=vdc.href, owner_name=vdc.name, owner_type=EntityType.VDC.value, user_href=user_href, user_name=user_name, org_href=org.href) task_href = task_resource.get('href') self._remove_compute_policy_from_vdc_async( task=task, task_href=task_href, user_href=user_href, org_href=org.href, ovdc_id=ovdc_id, compute_policy_href=compute_policy_href, remove_compute_policy_from_vms=remove_compute_policy_from_vms) return { 'task_href': task_href }
def remove_vdc_compute_policy_from_vdc( self, # noqa: E501 ovdc_id, compute_policy_href, force=False): # noqa: E501 """Delete the compute policy from the specified vdc. :param str ovdc_id: id of the vdc to assign the policy :param compute_policy_href: policy href to remove :param bool force: If True, will set affected VMs' compute policy to 'System Default' :return: dictionary containing 'task_href'. """ vdc = vcd_utils.get_vdc(self._sysadmin_client, vdc_id=ovdc_id) # TODO the following org will be associated with 'System' org. # task created should be associated with the corresponding org of the # vdc object. org = vcd_utils.get_org(self._sysadmin_client) org.reload() user_name = self._session.get('user') user_href = org.get_user(user_name).get('href') task = Task(self._sysadmin_client) task_resource = task.update( status=vcd_client.TaskStatus.RUNNING.value, namespace='vcloud.cse', operation=f"Removing compute policy (href: {compute_policy_href})" f" from org VDC (vdc id: {ovdc_id})", operation_name='Remove org VDC compute policy', details='', progress=None, owner_href=vdc.href, owner_name=vdc.name, owner_type=vcd_client.EntityType.VDC.value, user_href=user_href, user_name=user_name, org_href=org.href) task_href = task_resource.get('href') self._remove_compute_policy_from_vdc_async( ovdc_id=ovdc_id, compute_policy_href=compute_policy_href, task_resource=task_resource, force=force) return {'task_href': task_href}
class DefaultBroker(threading.Thread): def __init__(self, config): threading.Thread.__init__(self) self.config = config self.host = config['vcd']['host'] self.username = config['vcd']['username'] self.password = config['vcd']['password'] self.version = config['vcd']['api_version'] self.verify = config['vcd']['verify'] self.log = config['vcd']['log'] def _connect_sysadmin(self): if not self.verify: LOGGER.warning('InsecureRequestWarning: ' 'Unverified HTTPS request is being made. ' 'Adding certificate verification is strongly ' 'advised.') requests.packages.urllib3.disable_warnings() self.client_sysadmin = Client(uri=self.host, api_version=self.version, verify_ssl_certs=self.verify, log_file='sysadmin.log', log_headers=True, log_bodies=True) self.client_sysadmin.set_credentials( BasicLoginCredentials(self.username, 'System', self.password)) def _connect_tenant(self, headers): token = headers.get('x-vcloud-authorization') accept_header = headers.get('Accept') version = accept_header.split('version=')[1] self.client_tenant = Client(uri=self.host, api_version=version, verify_ssl_certs=self.verify, log_file='tenant.log', log_headers=True, log_bodies=True) session = self.client_tenant.rehydrate_from_token(token) return { 'user_name': session.get('user'), 'user_id': session.get('userId'), 'org_name': session.get('org'), 'org_href': self.client_tenant._get_wk_endpoint( _WellKnownEndpoint.LOGGED_IN_ORG) } def update_task(self, status, operation, message=None, error_message=None): if not hasattr(self, 'task'): self.task = Task(self.client_sysadmin) if message is None: message = OP_MESSAGE[operation] if hasattr(self, 't'): task_href = self.t.get('href') else: task_href = None self.t = self.task.update(status.value, 'vcloud.cse', message, operation, '', None, 'urn:cse:cluster:%s' % self.cluster_id, self.cluster_name, 'application/vcloud.cse.cluster+xml', self.tenant_info['user_id'], self.tenant_info['user_name'], org_href=self.tenant_info['org_href'], task_href=task_href, error_message=error_message) def is_valid_name(self, name): """Validates that the cluster name against the pattern. """ if len(name) > MAX_HOST_NAME_LENGTH: return False if name[-1] == '.': name = name[:-1] allowed = re.compile("(?!-)[A-Z\d-]{1,63}(?<!-)$", re.IGNORECASE) return all(allowed.match(x) for x in name.split(".")) def get_template(self, name=None): if name is None: if 'template' in self.body and self.body['template'] is not None: name = self.body['template'] else: name = self.config['broker']['default_template'] for template in self.config['broker']['templates']: if template['name'] == name: return template raise Exception('Template %s not found' % name) def run(self): LOGGER.debug('thread started op=%s' % self.op) if self.op == OP_CREATE_CLUSTER: self.create_cluster_thread() elif self.op == OP_DELETE_CLUSTER: self.delete_cluster_thread() def list_clusters(self, headers, body): result = {} try: result['body'] = [] result['status_code'] = OK self._connect_tenant(headers) clusters = load_from_metadata(self.client_tenant, get_leader_ip=True) result['body'] = clusters except Exception: LOGGER.error(traceback.format_exc()) result['body'] = [] result['status_code'] = INTERNAL_SERVER_ERROR result['message'] = traceback.format_exc() return result def create_cluster(self, headers, body): result = {} result['body'] = {} cluster_name = body['name'] vdc_name = body['vdc'] node_count = body['node_count'] LOGGER.debug('about to create cluster %s on %s with %s nodes, sp=%s', cluster_name, vdc_name, node_count, body['storage_profile']) result['body'] = {'message': 'can\'t create cluster %s' % cluster_name} result['status_code'] = INTERNAL_SERVER_ERROR try: if not self.is_valid_name(cluster_name): raise Exception('Invalid cluster name') self.tenant_info = self._connect_tenant(headers) self.headers = headers self.body = body self.cluster_name = cluster_name self.cluster_id = str(uuid.uuid4()) self.op = OP_CREATE_CLUSTER self._connect_sysadmin() self.update_task(TaskStatus.RUNNING, self.op, message='Creating cluster %s(%s)' % (cluster_name, self.cluster_id)) self.daemon = True self.start() response_body = {} response_body['name'] = self.cluster_name response_body['cluster_id'] = self.cluster_id response_body['task_href'] = self.t.get('href') result['body'] = response_body result['status_code'] = ACCEPTED except Exception as e: result['body'] = {'message': e.message} LOGGER.error(traceback.format_exc()) return result def create_cluster_thread(self): network_name = self.body['network'] try: clusters = load_from_metadata(self.client_tenant, name=self.cluster_name) if len(clusters) != 0: raise Exception('Cluster already exists.') org_resource = self.client_tenant.get_org() org = Org(self.client_tenant, resource=org_resource) vdc_resource = org.get_vdc(self.body['vdc']) vdc = VDC(self.client_tenant, resource=vdc_resource) template = self.get_template() self.update_task(TaskStatus.RUNNING, self.op, message='Creating cluster vApp %s(%s)' % (self.cluster_name, self.cluster_id)) vapp_resource = vdc.create_vapp(self.cluster_name, description='cluster %s' % self.cluster_name, network=network_name, fence_mode='bridged') t = self.client_tenant.get_task_monitor().wait_for_status( task=vapp_resource.Tasks.Task[0], timeout=60, poll_frequency=2, fail_on_status=None, expected_target_statuses=[ TaskStatus.SUCCESS, TaskStatus.ABORTED, TaskStatus.ERROR, TaskStatus.CANCELED ], callback=None) assert t.get('status').lower() == TaskStatus.SUCCESS.value tags = {} tags['cse.cluster.id'] = self.cluster_id tags['cse.version'] = pkg_resources.require( 'container-service-extension')[0].version tags['cse.template'] = template['name'] vapp = VApp(self.client_tenant, href=vapp_resource.get('href')) for k, v in tags.items(): t = vapp.set_metadata('GENERAL', 'READWRITE', k, v) self.client_tenant.get_task_monitor().\ wait_for_status( task=t, timeout=600, poll_frequency=5, fail_on_status=None, expected_target_statuses=[TaskStatus.SUCCESS], callback=None) self.update_task(TaskStatus.RUNNING, self.op, message='Creating master node for %s(%s)' % (self.cluster_name, self.cluster_id)) vapp.reload() add_nodes(1, template, TYPE_MASTER, self.config, self.client_tenant, org, vdc, vapp, self.body, wait=True) self.update_task(TaskStatus.RUNNING, self.op, message='Initializing cluster %s(%s)' % (self.cluster_name, self.cluster_id)) vapp.reload() init_cluster(self.config, vapp, template) master_ip = get_master_ip(self.config, vapp, template) t = vapp.set_metadata('GENERAL', 'READWRITE', 'cse.master.ip', master_ip) self.client_tenant.get_task_monitor().\ wait_for_status( task=t, timeout=600, poll_frequency=5, fail_on_status=None, expected_target_statuses=[TaskStatus.SUCCESS], callback=None) if self.body['node_count'] > 0: self.update_task(TaskStatus.RUNNING, self.op, message='Creating %s node(s) for %s(%s)' % (self.body['node_count'], self.cluster_name, self.cluster_id)) add_nodes(self.body['node_count'], template, TYPE_NODE, self.config, self.client_tenant, org, vdc, vapp, self.body, wait=True) self.update_task(TaskStatus.RUNNING, self.op, message='Adding %s node(s) to %s(%s)' % (self.body['node_count'], self.cluster_name, self.cluster_id)) vapp.reload() join_cluster(self.config, vapp, template) self.update_task(TaskStatus.SUCCESS, self.op, message='Created cluster %s(%s)' % (self.cluster_name, self.cluster_id)) except Exception as e: LOGGER.error(traceback.format_exc()) self.update_task(TaskStatus.ERROR, self.op, error_message=str(e)) def delete_cluster(self, headers, body): result = {} result['body'] = {} LOGGER.debug('about to delete cluster with name: %s' % body['name']) result['status_code'] = INTERNAL_SERVER_ERROR try: self.cluster_name = body['name'] self.tenant_info = self._connect_tenant(headers) self.headers = headers self.body = body self.op = OP_DELETE_CLUSTER self._connect_sysadmin() clusters = load_from_metadata(self.client_tenant, name=self.cluster_name) if len(clusters) != 1: raise Exception('Cluster %s not found.' % self.cluster_name) self.cluster = clusters[0] self.cluster_id = self.cluster['cluster_id'] self.update_task(TaskStatus.RUNNING, self.op, message='Deleting cluster %s(%s)' % (self.cluster_name, self.cluster_id)) self.daemon = True self.start() response_body = {} response_body['cluster_name'] = self.cluster_name response_body['task_href'] = self.t.get('href') result['body'] = response_body result['status_code'] = ACCEPTED except Exception as e: if hasattr(e, 'message'): result['body'] = {'message': e.message} else: result['body'] = {'message': str(e)} LOGGER.error(traceback.format_exc()) return result def delete_cluster_thread(self): LOGGER.debug('about to delete cluster with name: %s', self.cluster_name) try: vdc = VDC(self.client_tenant, href=self.cluster['vdc_href']) delete_task = vdc.delete_vapp(self.cluster['name'], force=True) self.client_tenant.get_task_monitor().\ wait_for_status( task=delete_task, timeout=600, poll_frequency=5, fail_on_status=None, expected_target_statuses=[TaskStatus.SUCCESS], callback=None) self.update_task(TaskStatus.SUCCESS, self.op, message='Deleted cluster %s(%s)' % (self.cluster_name, self.cluster_id)) except Exception as e: LOGGER.error(traceback.format_exc()) self.update_task(self.cluster_name, self.cluster_id, TaskStatus.ERROR, self.op, error_message=str(e)) def get_cluster_config(self, cluster_name, headers): result = {} try: self._connect_tenant(headers) clusters = load_from_metadata(self.client_tenant, name=cluster_name) if len(clusters) != 1: raise Exception('Cluster \'%s\' not found' % cluster_name) vapp = VApp(self.client_tenant, href=clusters[0]['vapp_href']) template = self.get_template(name=clusters[0]['template']) result['body'] = get_cluster_config(self.config, vapp, template['admin_password']) result['status_code'] = OK except Exception as e: result['body'] = str(e) result['status_code'] = INTERNAL_SERVER_ERROR return result
class DefaultBroker(threading.Thread): def __init__(self, config): threading.Thread.__init__(self) self.config = config self.host = config['vcd']['host'] self.username = config['vcd']['username'] self.password = config['vcd']['password'] self.version = config['vcd']['api_version'] self.verify = config['vcd']['verify'] self.log = config['vcd']['log'] def _connect_sysadmin(self): if not self.verify: LOGGER.warning('InsecureRequestWarning: ' 'Unverified HTTPS request is being made. ' 'Adding certificate verification is strongly ' 'advised.') requests.packages.urllib3.disable_warnings() self.client_sysadmin = Client(uri=self.host, api_version=self.version, verify_ssl_certs=self.verify, log_headers=True, log_bodies=True) self.client_sysadmin.set_credentials( BasicLoginCredentials(self.username, 'System', self.password)) def _connect_tenant(self, headers): token = headers.get('x-vcloud-authorization') accept_header = headers.get('Accept') version = accept_header.split('version=')[1] self.client_tenant = Client(uri=self.host, api_version=version, verify_ssl_certs=self.verify, log_headers=True, log_bodies=True) session = self.client_tenant.rehydrate_from_token(token) return { 'user_name': session.get('user'), 'user_id': session.get('userId'), 'org_name': session.get('org'), 'org_href': self.client_tenant._get_wk_endpoint( _WellKnownEndpoint.LOGGED_IN_ORG) } def _to_message(self, e): if hasattr(e, 'message'): return {'message': e.message} else: return {'message': str(e)} def update_task(self, status, message=None, error_message=None): if not hasattr(self, 'task'): self.task = Task(self.client_sysadmin) if message is None: message = OP_MESSAGE[self.op] if hasattr(self, 'task_resource'): task_href = self.task_resource.get('href') else: task_href = None self.task_resource = self.task.update( status.value, 'vcloud.cse', message, self.op, '', None, 'urn:cse:cluster:%s' % self.cluster_id, self.cluster_name, 'application/vcloud.cse.cluster+xml', self.tenant_info['user_id'], self.tenant_info['user_name'], org_href=self.tenant_info['org_href'], task_href=task_href, error_message=error_message) def is_valid_name(self, name): """Validate that the cluster name against the pattern.""" if len(name) > MAX_HOST_NAME_LENGTH: return False if name[-1] == '.': name = name[:-1] allowed = re.compile("(?!-)[A-Z\d-]{1,63}(?<!-)$", re.IGNORECASE) return all(allowed.match(x) for x in name.split(".")) def get_template(self, name=None): if name is None: if 'template' in self.body and self.body['template'] is not None: name = self.body['template'] else: name = self.config['broker']['default_template'] for template in self.config['broker']['templates']: if template['name'] == name: return template raise Exception('Template %s not found' % name) def run(self): LOGGER.debug('thread started op=%s' % self.op) if self.op == OP_CREATE_CLUSTER: self.create_cluster_thread() elif self.op == OP_DELETE_CLUSTER: self.delete_cluster_thread() elif self.op == OP_CREATE_NODES: self.create_nodes_thread() elif self.op == OP_DELETE_NODES: self.delete_nodes_thread() def list_clusters(self, headers, body): result = {} try: result['body'] = [] result['status_code'] = OK self._connect_tenant(headers) clusters = load_from_metadata(self.client_tenant) result['body'] = clusters except Exception: LOGGER.error(traceback.format_exc()) result['body'] = [] result['status_code'] = INTERNAL_SERVER_ERROR result['message'] = traceback.format_exc() return result def get_cluster_info(self, name, headers, body): result = {} try: result['body'] = [] result['status_code'] = OK self._connect_tenant(headers) clusters = load_from_metadata(self.client_tenant, name=name) if len(clusters) == 0: raise Exception('Cluster \'%s\' not found.' % name) vapp = VApp(self.client_tenant, href=clusters[0]['vapp_href']) vms = vapp.get_all_vms() for vm in vms: node_info = { 'name': vm.get('name'), 'numberOfCpus': '', 'memoryMB': '', 'status': VCLOUD_STATUS_MAP.get(int(vm.get('status'))), 'ipAddress': '' } if hasattr(vm, 'VmSpecSection'): node_info['numberOfCpus'] = vm.VmSpecSection.NumCpus.text node_info[ 'memoryMB'] = \ vm.VmSpecSection.MemoryResourceMb.Configured.text try: node_info['ipAddress'] = vapp.get_primary_ip( vm.get('name')) except Exception: LOGGER.debug('cannot get ip address for node %s' % vm.get('name')) if vm.get('name').startswith(TYPE_MASTER): node_info['node_type'] = 'master' clusters[0].get('master_nodes').append(node_info) elif vm.get('name').startswith(TYPE_NODE): node_info['node_type'] = 'node' clusters[0].get('nodes').append(node_info) result['body'] = clusters[0] except Exception as e: LOGGER.error(traceback.format_exc()) result['body'] = [] result['status_code'] = INTERNAL_SERVER_ERROR result['message'] = str(e) return result def create_cluster(self, headers, body): result = {} result['body'] = {} cluster_name = body['name'] vdc_name = body['vdc'] node_count = body['node_count'] LOGGER.debug('about to create cluster %s on %s with %s nodes, sp=%s', cluster_name, vdc_name, node_count, body['storage_profile']) result['body'] = { 'message': 'can\'t create cluster \'%s\'' % cluster_name } result['status_code'] = INTERNAL_SERVER_ERROR try: if not self.is_valid_name(cluster_name): raise Exception('Invalid cluster name') self.tenant_info = self._connect_tenant(headers) self.headers = headers self.body = body self.cluster_name = cluster_name self.cluster_id = str(uuid.uuid4()) self.op = OP_CREATE_CLUSTER self._connect_sysadmin() self.update_task(TaskStatus.RUNNING, message='Creating cluster %s(%s)' % (cluster_name, self.cluster_id)) self.daemon = True self.start() response_body = {} response_body['name'] = self.cluster_name response_body['cluster_id'] = self.cluster_id response_body['task_href'] = self.task_resource.get('href') result['body'] = response_body result['status_code'] = ACCEPTED except Exception as e: result['body'] = self._to_message(e) LOGGER.error(traceback.format_exc()) return result def create_cluster_thread(self): network_name = self.body['network'] try: clusters = load_from_metadata(self.client_tenant, name=self.cluster_name) if len(clusters) != 0: raise Exception('Cluster already exists.') org_resource = self.client_tenant.get_org() org = Org(self.client_tenant, resource=org_resource) vdc_resource = org.get_vdc(self.body['vdc']) vdc = VDC(self.client_tenant, resource=vdc_resource) template = self.get_template() self.update_task(TaskStatus.RUNNING, message='Creating cluster vApp %s(%s)' % (self.cluster_name, self.cluster_id)) vapp_resource = vdc.create_vapp(self.cluster_name, description='cluster %s' % self.cluster_name, network=network_name, fence_mode='bridged') self.client_tenant.get_task_monitor().wait_for_status( vapp_resource.Tasks.Task[0]) tags = {} tags['cse.cluster.id'] = self.cluster_id tags['cse.version'] = pkg_resources.require( 'container-service-extension')[0].version tags['cse.template'] = template['name'] vapp = VApp(self.client_tenant, href=vapp_resource.get('href')) for k, v in tags.items(): task = vapp.set_metadata('GENERAL', 'READWRITE', k, v) self.client_tenant.get_task_monitor().wait_for_status(task) self.update_task(TaskStatus.RUNNING, message='Creating master node for %s(%s)' % (self.cluster_name, self.cluster_id)) vapp.reload() add_nodes(1, template, TYPE_MASTER, self.config, self.client_tenant, org, vdc, vapp, self.body) self.update_task(TaskStatus.RUNNING, message='Initializing cluster %s(%s)' % (self.cluster_name, self.cluster_id)) vapp.reload() init_cluster(self.config, vapp, template) master_ip = get_master_ip(self.config, vapp, template) task = vapp.set_metadata('GENERAL', 'READWRITE', 'cse.master.ip', master_ip) self.client_tenant.get_task_monitor().wait_for_status(task) if self.body['node_count'] > 0: self.update_task(TaskStatus.RUNNING, message='Creating %s node(s) for %s(%s)' % (self.body['node_count'], self.cluster_name, self.cluster_id)) add_nodes(self.body['node_count'], template, TYPE_NODE, self.config, self.client_tenant, org, vdc, vapp, self.body) self.update_task(TaskStatus.RUNNING, message='Adding %s node(s) to %s(%s)' % (self.body['node_count'], self.cluster_name, self.cluster_id)) vapp.reload() join_cluster(self.config, vapp, template) self.update_task(TaskStatus.SUCCESS, message='Created cluster %s(%s)' % (self.cluster_name, self.cluster_id)) except Exception as e: LOGGER.error(traceback.format_exc()) self.update_task(TaskStatus.ERROR, error_message=str(e)) def delete_cluster(self, headers, body): result = {} result['body'] = {} LOGGER.debug('about to delete cluster with name: %s' % body['name']) result['status_code'] = INTERNAL_SERVER_ERROR try: self.cluster_name = body['name'] self.tenant_info = self._connect_tenant(headers) self.headers = headers self.body = body self.op = OP_DELETE_CLUSTER self._connect_sysadmin() clusters = load_from_metadata(self.client_tenant, name=self.cluster_name) if len(clusters) != 1: raise Exception('Cluster %s not found.' % self.cluster_name) self.cluster = clusters[0] self.cluster_id = self.cluster['cluster_id'] self.update_task(TaskStatus.RUNNING, message='Deleting cluster %s(%s)' % (self.cluster_name, self.cluster_id)) self.daemon = True self.start() response_body = {} response_body['cluster_name'] = self.cluster_name response_body['task_href'] = self.task_resource.get('href') result['body'] = response_body result['status_code'] = ACCEPTED except Exception as e: result['body'] = self._to_message(e) LOGGER.error(traceback.format_exc()) return result def delete_cluster_thread(self): LOGGER.debug('about to delete cluster with name: %s', self.cluster_name) try: vdc = VDC(self.client_tenant, href=self.cluster['vdc_href']) task = vdc.delete_vapp(self.cluster['name'], force=True) self.client_tenant.get_task_monitor().wait_for_status(task) self.update_task(TaskStatus.SUCCESS, message='Deleted cluster %s(%s)' % (self.cluster_name, self.cluster_id)) except Exception as e: LOGGER.error(traceback.format_exc()) self.update_task(TaskStatus.ERROR, error_message=str(e)) def get_cluster_config(self, cluster_name, headers): result = {} try: self._connect_tenant(headers) clusters = load_from_metadata(self.client_tenant, name=cluster_name) if len(clusters) != 1: raise Exception('Cluster \'%s\' not found' % cluster_name) vapp = VApp(self.client_tenant, href=clusters[0]['vapp_href']) template = self.get_template(name=clusters[0]['template']) result['body'] = get_cluster_config(self.config, vapp, template['admin_password']) result['status_code'] = OK except Exception as e: result['body'] = self._to_message(e) result['status_code'] = INTERNAL_SERVER_ERROR return result def create_nodes(self, headers, body): result = {'body': {}} self.cluster_name = body['name'] LOGGER.debug('about to add %s nodes to cluster %s on VDC %s, sp=%s', body['node_count'], self.cluster_name, body['vdc'], body['storage_profile']) result['status_code'] = INTERNAL_SERVER_ERROR try: if body['node_count'] < 1: raise Exception('Invalid node count: %s.' % body['node_count']) self.tenant_info = self._connect_tenant(headers) clusters = load_from_metadata(self.client_tenant, name=self.cluster_name) if len(clusters) != 1: raise Exception('Cluster \'%s\' not found.' % self.cluster_name) self.cluster = clusters[0] self.headers = headers self.body = body self.op = OP_CREATE_NODES self._connect_sysadmin() self.cluster_id = self.cluster['cluster_id'] self.update_task( TaskStatus.RUNNING, message='Adding %s node(s) to cluster %s(%s)' % (body['node_count'], self.cluster_name, self.cluster_id)) self.daemon = True self.start() response_body = {} response_body['cluster_name'] = self.cluster_name response_body['task_href'] = self.task_resource.get('href') result['body'] = response_body result['status_code'] = ACCEPTED except Exception as e: result['body'] = self._to_message(e) LOGGER.error(traceback.format_exc()) return result def create_nodes_thread(self): LOGGER.debug('about to add nodes to cluster with name: %s', self.cluster_name) try: org_resource = self.client_tenant.get_org() org = Org(self.client_tenant, resource=org_resource) vdc = VDC(self.client_tenant, href=self.cluster['vdc_href']) vapp = VApp(self.client_tenant, href=self.cluster['vapp_href']) template = self.get_template() self.update_task( TaskStatus.RUNNING, message='Creating %s node(s) for %s(%s)' % (self.body['node_count'], self.cluster_name, self.cluster_id)) new_nodes = add_nodes(self.body['node_count'], template, TYPE_NODE, self.config, self.client_tenant, org, vdc, vapp, self.body) self.update_task( TaskStatus.RUNNING, message='Adding %s node(s) to %s(%s)' % (self.body['node_count'], self.cluster_name, self.cluster_id)) target_nodes = [] for spec in new_nodes['specs']: target_nodes.append(spec['target_vm_name']) vapp.reload() join_cluster(self.config, vapp, template, target_nodes) self.update_task( TaskStatus.SUCCESS, message='Added %s node(s) to cluster %s(%s)' % (self.body['node_count'], self.cluster_name, self.cluster_id)) except Exception as e: LOGGER.error(traceback.format_exc()) self.update_task(TaskStatus.ERROR, error_message=str(e)) def delete_nodes(self, headers, body): result = {'body': {}} self.cluster_name = body['name'] LOGGER.debug('about to delete nodes from cluster with name: %s' % body['name']) result['status_code'] = INTERNAL_SERVER_ERROR try: if len(body['nodes']) < 1: raise Exception('Invalid list of nodes: %s.' % body['nodes']) for node in body['nodes']: if node.startswith(TYPE_MASTER): raise Exception('Can\'t delete a master node: \'%s\'.' % node) self.tenant_info = self._connect_tenant(headers) clusters = load_from_metadata(self.client_tenant, name=self.cluster_name) if len(clusters) != 1: raise Exception('Cluster \'%s\' not found.' % self.cluster_name) self.cluster = clusters[0] self.headers = headers self.body = body self.op = OP_DELETE_NODES self._connect_sysadmin() self.cluster_id = self.cluster['cluster_id'] self.update_task( TaskStatus.RUNNING, message='Deleting %s node(s) from cluster %s(%s)' % (len(body['nodes']), self.cluster_name, self.cluster_id)) self.daemon = True self.start() response_body = {} response_body['cluster_name'] = self.cluster_name response_body['task_href'] = self.task_resource.get('href') result['body'] = response_body result['status_code'] = ACCEPTED except Exception as e: result['body'] = self._to_message(e) LOGGER.error(traceback.format_exc()) return result def delete_nodes_thread(self): LOGGER.debug('about to delete nodes from cluster with name: %s', self.cluster_name) try: vapp = VApp(self.client_tenant, href=self.cluster['vapp_href']) template = self.get_template() self.update_task( TaskStatus.RUNNING, message='Deleting %s node(s) from %s(%s)' % (len(self.body['nodes']), self.cluster_name, self.cluster_id)) delete_nodes_from_cluster(self.config, vapp, template, self.body['nodes'], self.body['force']) self.update_task( TaskStatus.RUNNING, message='Undeploying %s node(s) for %s(%s)' % (len(self.body['nodes']), self.cluster_name, self.cluster_id)) for vm_name in self.body['nodes']: vm = VM(self.client_tenant, resource=vapp.get_vm(vm_name)) try: task = vm.undeploy() self.client_tenant.get_task_monitor().wait_for_status(task) except Exception as e: LOGGER.warning('couldn\'t undeploy VM %s' % vm_name) self.update_task( TaskStatus.RUNNING, message='Deleting %s VM(s) for %s(%s)' % (len(self.body['nodes']), self.cluster_name, self.cluster_id)) task = vapp.delete_vms(self.body['nodes']) self.client_tenant.get_task_monitor().wait_for_status(task) self.update_task( TaskStatus.SUCCESS, message='Deleted %s node(s) to cluster %s(%s)' % (len(self.body['nodes']), self.cluster_name, self.cluster_id)) except Exception as e: LOGGER.error(traceback.format_exc()) self.update_task(TaskStatus.ERROR, error_message=str(e))
def _update_ovdc_using_placement_policy_async(operation_context: ctx.OperationContext, # noqa: E501 task: vcd_task.Task, task_href, user_href, policy_list, ovdc_id, vdc, remove_cp_from_vms_on_disable=False): # noqa: E501 """Enable ovdc using placement policies. :param ctx.OperationContext operation_context: operation context object :param vcd_task.Task task: Task resource to track progress :param str task_href: href of the task :param str user_href: :param List[str] policy_list: The new list of policies associated with the ovdc :param str ovdc_id: :param pyvcloud.vcd.vdc.VDC vdc: VDC object :param bool remove_cp_from_vms_on_disable: Set to true if placement policies need to be removed from the vms before removing from the VDC. """ operation_name = "Update OVDC with placement policies" k8s_runtimes_added = '' k8s_runtimes_deleted = '' try: config = utils.get_server_runtime_config() log_wire = utils.str_to_bool(config.get('service', {}).get('log_wire')) cpm = compute_policy_manager.ComputePolicyManager( operation_context.sysadmin_client, log_wire=log_wire) existing_policies = [] for cse_policy in \ compute_policy_manager.list_cse_placement_policies_on_vdc(cpm, ovdc_id): # noqa: E501 existing_policies.append(cse_policy['display_name']) logger.SERVER_LOGGER.debug(policy_list) logger.SERVER_LOGGER.debug(existing_policies) policies_to_add = set(policy_list) - set(existing_policies) policies_to_delete = set(existing_policies) - set(policy_list) # Telemetry for 'vcd cse ovdc enable' command # TODO: Update telemetry request to handle 'k8s_runtime' array k8s_runtimes_added = ','.join(policies_to_add) if k8s_runtimes_added: cse_params = { RequestKey.K8S_PROVIDER: k8s_runtimes_added, RequestKey.OVDC_ID: ovdc_id, } telemetry_handler.record_user_action_details(cse_operation=CseOperation.OVDC_ENABLE, # noqa: E501 cse_params=cse_params) # Telemetry for 'vcd cse ovdc enable' command # TODO: Update telemetry request to handle 'k8s_runtime' array k8s_runtimes_deleted = '.'.join(policies_to_delete) if k8s_runtimes_deleted: cse_params = { RequestKey.K8S_PROVIDER: k8s_runtimes_deleted, RequestKey.OVDC_ID: ovdc_id, RequestKey.REMOVE_COMPUTE_POLICY_FROM_VMS: remove_cp_from_vms_on_disable # noqa: E501 } telemetry_handler.record_user_action_details(cse_operation=CseOperation.OVDC_DISABLE, # noqa: E501 cse_params=cse_params) for cp_name in policies_to_add: msg = f"Adding k8s provider {cp_name} to OVDC {vdc.name}" logger.SERVER_LOGGER.debug(msg) task.update(status=vcd_client.TaskStatus.RUNNING.value, namespace='vcloud.cse', operation=msg, operation_name=operation_name, details='', progress=None, owner_href=vdc.href, owner_name=vdc.name, owner_type=vcd_client.EntityType.VDC.value, user_href=user_href, user_name=operation_context.user.name, task_href=task_href, org_href=operation_context.user.org_href) policy = compute_policy_manager.get_cse_vdc_compute_policy( cpm, cp_name, is_placement_policy=True) cpm.add_compute_policy_to_vdc(vdc_id=ovdc_id, compute_policy_href=policy['href']) for cp_name in policies_to_delete: msg = f"Removing k8s provider {RUNTIME_INTERNAL_NAME_TO_DISPLAY_NAME_MAP[cp_name]} from OVDC {ovdc_id}" # noqa: E501 logger.SERVER_LOGGER.debug(msg) task_resource = \ task.update(status=vcd_client.TaskStatus.RUNNING.value, namespace='vcloud.cse', operation=msg, operation_name=operation_name, details='', progress=None, owner_href=vdc.href, owner_name=vdc.name, owner_type=vcd_client.EntityType.VDC.value, user_href=user_href, user_name=operation_context.user.name, task_href=task_href, org_href=operation_context.user.org_href) policy = compute_policy_manager.get_cse_vdc_compute_policy(cpm, cp_name, is_placement_policy=True) # noqa: E501 cpm.remove_compute_policy_from_vdc_sync(vdc=vdc, compute_policy_href=policy['href'], # noqa: E501 force=remove_cp_from_vms_on_disable, # noqa: E501 is_placement_policy=True, task_resource=task_resource) # noqa: E501 msg = f"Successfully updated OVDC: {vdc.name}" logger.SERVER_LOGGER.debug(msg) task.update(status=vcd_client.TaskStatus.SUCCESS.value, namespace='vcloud.cse', operation="Operation success", operation_name=operation_name, details=msg, progress=None, owner_href=vdc.href, owner_name=vdc.name, owner_type=vcd_client.EntityType.VDC.value, user_href=user_href, user_name=operation_context.user.name, task_href=task_href, org_href=operation_context.user.org_href) # Record telemetry if k8s_runtimes_added: telemetry_handler.record_user_action(CseOperation.OVDC_ENABLE, status=OperationStatus.SUCCESS) # noqa: E501 if k8s_runtimes_deleted: telemetry_handler.record_user_action(CseOperation.OVDC_DISABLE, status=OperationStatus.SUCCESS) # noqa: E501 except Exception as err: # Record telemetry if k8s_runtimes_added: telemetry_handler.record_user_action(CseOperation.OVDC_ENABLE, status=OperationStatus.FAILED) if k8s_runtimes_deleted: telemetry_handler.record_user_action(CseOperation.OVDC_DISABLE, status=OperationStatus.FAILED) logger.SERVER_LOGGER.error(err) task.update(status=vcd_client.TaskStatus.ERROR.value, namespace='vcloud.cse', operation='Failed to update OVDC', operation_name=operation_name, details=f'Failed with error: {err}', progress=None, owner_href=vdc.href, owner_name=vdc.name, owner_type=vcd_client.EntityType.VDC.value, user_href=user_href, user_name=operation_context.user.name, task_href=task_href, org_href=operation_context.user.org_href, error_message=f"{err}") finally: if operation_context.sysadmin_client: operation_context.end()
def remove_compute_policy_from_vdc_sync(self, vdc, compute_policy_href, force=False, is_placement_policy=False, task_resource=None): """Remove compute policy from vdc. This method makes use of an umbrella task which can be used for tracking progress. If the umbrella task is not specified, it is created. :param pyvcloud.vcd.vdc.VDC vdc: VDC object :param str compute_policy_href: href of the compute policy to remove :param bool force: Force remove compute policy from vms in the VDC as well :param lxml.objectify.Element task_resource: Task resource for the umbrella task """ user_name = self._session.get('user') task = Task(self._sysadmin_client) task_href = None is_umbrella_task = task_resource is not None # Create a task if not umbrella task if not is_umbrella_task: # TODO the following org will be associated with 'System' org. # task created should be associated with the corresponding org of # the vdc object. org = vcd_utils.get_org(self._sysadmin_client) org.reload() user_href = org.get_user(user_name).get('href') org_href = org.href task_resource = task.update( status=vcd_client.TaskStatus.RUNNING.value, namespace='vcloud.cse', operation= f"Removing compute policy (href: {compute_policy_href})" # noqa: E501 f" from org VDC (vdc id: {vdc.name})", operation_name='Remove org VDC compute policy', details='', progress=None, owner_href=vdc.href, owner_name=vdc.name, owner_type=vcd_client.EntityType.VDC.value, user_href=user_href, user_name=user_name, org_href=org.href) else: user_href = task_resource.User.get('href') org_href = task_resource.Organization.get('href') task_href = task_resource.get('href') try: # remove the compute policy from VMs if force is True if force: compute_policy_id = retrieve_compute_policy_id_from_href( compute_policy_href) # noqa: E501 vdc_id = vcd_utils.extract_id(vdc.get_resource().get('id')) vapps = vcd_utils.get_all_vapps_in_ovdc( client=self._sysadmin_client, ovdc_id=vdc_id) target_vms = [] system_default_href = None operation_msg = None for cp_dict in self.list_compute_policies_on_vdc(vdc_id): if cp_dict['name'] == _SYSTEM_DEFAULT_COMPUTE_POLICY: system_default_href = cp_dict['href'] break if is_placement_policy: for vapp in vapps: target_vms += \ [vm for vm in vapp.get_all_vms() if self._get_vm_placement_policy_id(vm) == compute_policy_id] # noqa: E501 vm_names = [vm.get('name') for vm in target_vms] operation_msg = f"Removing placement policy from " \ f"{len(vm_names)} VMs. " \ f"Affected VMs: {vm_names}" else: for vapp in vapps: target_vms += \ [vm for vm in vapp.get_all_vms() if self._get_vm_sizing_policy_id(vm) == compute_policy_id] # noqa: E501 vm_names = [vm.get('name') for vm in target_vms] operation_msg = "Setting sizing policy to " \ f"'{_SYSTEM_DEFAULT_COMPUTE_POLICY}' on " \ f"{len(vm_names)} VMs. " \ f"Affected VMs: {vm_names}" task.update(status=vcd_client.TaskStatus.RUNNING.value, namespace='vcloud.cse', operation=operation_msg, operation_name='Remove org VDC compute policy', details='', progress=None, owner_href=vdc.href, owner_name=vdc.name, owner_type=vcd_client.EntityType.VDC.value, user_href=user_href, user_name=user_name, task_href=task_href, org_href=org_href) task_monitor = self._sysadmin_client.get_task_monitor() for vm_resource in target_vms: vm = VM(self._sysadmin_client, href=vm_resource.get('href')) _task = None operation_msg = None if is_placement_policy: if hasattr(vm_resource, 'ComputePolicy') and \ not hasattr(vm_resource.ComputePolicy, 'VmSizingPolicy'): # noqa: E501 # Updating sizing policy for the VM _task = vm.update_compute_policy( compute_policy_href=system_default_href) operation_msg = \ "Setting compute policy to " \ f"'{_SYSTEM_DEFAULT_COMPUTE_POLICY}' "\ f"on VM '{vm_resource.get('name')}'" task.update( status=vcd_client.TaskStatus.RUNNING.value, namespace='vcloud.cse', operation=operation_msg, operation_name= f'Setting sizing policy to {_SYSTEM_DEFAULT_COMPUTE_POLICY}', # noqa: E501 details='', progress=None, owner_href=vdc.href, owner_name=vdc.name, owner_type=vcd_client.EntityType.VDC.value, user_href=user_href, user_name=user_name, task_href=task_href, org_href=org_href) task_monitor.wait_for_success(_task) _task = vm.remove_placement_policy() operation_msg = "Removing placement policy on VM " \ f"'{vm_resource.get('name')}'" task.update( status=vcd_client.TaskStatus.RUNNING.value, namespace='vcloud.cse', operation=operation_msg, operation_name='Remove org VDC compute policy', details='', progress=None, owner_href=vdc.href, owner_name=vdc.name, owner_type=vcd_client.EntityType.VDC.value, user_href=user_href, user_name=user_name, task_href=task_href, org_href=org_href) task_monitor.wait_for_success(_task) else: _task = vm.update_compute_policy( compute_policy_href=system_default_href) operation_msg = "Setting sizing policy to " \ f"'{_SYSTEM_DEFAULT_COMPUTE_POLICY}' "\ f"on VM '{vm_resource.get('name')}'" task.update( status=vcd_client.TaskStatus.RUNNING.value, namespace='vcloud.cse', operation=operation_msg, operation_name='Remove org VDC compute policy', details='', progress=None, owner_href=vdc.href, owner_name=vdc.name, owner_type=vcd_client.EntityType.VDC.value, user_href=user_href, user_name=user_name, task_href=task_href, org_href=org_href) task_monitor.wait_for_success(_task) final_status = vcd_client.TaskStatus.RUNNING.value \ if is_umbrella_task else vcd_client.TaskStatus.SUCCESS.value task.update(status=final_status, namespace='vcloud.cse', operation=f"Removing compute policy (href:" f"{compute_policy_href}) from org VDC '{vdc.name}'", operation_name='Remove org VDC compute policy', details='', progress=None, owner_href=vdc.href, owner_name=vdc.name, owner_type=vcd_client.EntityType.VDC.value, user_href=user_href, user_name=user_name, task_href=task_href, org_href=org_href) vdc.remove_compute_policy(compute_policy_href) except Exception as err: logger.SERVER_LOGGER.error(err, exc_info=True) # Set task to error if not an umbrella task if not is_umbrella_task: msg = 'Failed to remove compute policy: ' \ f'{compute_policy_href} from the OVDC: {vdc.name}' task.update(status=vcd_client.TaskStatus.ERROR.value, namespace='vcloud.cse', operation=msg, operation_name='Remove org VDC compute policy', details='', progress=None, owner_href=vdc.href, owner_name=vdc.name, owner_type=vcd_client.EntityType.VDC.value, user_href=user_href, user_name=self._session.get('user'), task_href=task_href, org_href=org_href, error_message=f"{err}", stack_trace='') raise err
class VcdBroker(AbstractBroker, threading.Thread): def __init__(self, tenant_auth_token, request_spec): super().__init__(tenant_auth_token, request_spec) threading.Thread.__init__(self) self.req_spec = request_spec self.tenant_client = None self.client_session = None self.tenant_info = None self.sys_admin_client = None self.task = None self.task_resource = None self.op = None self.cluster_name = None self.cluster_id = None self.daemon = False def _connect_sys_admin(self): self.sys_admin_client = get_sys_admin_client() def _disconnect_sys_admin(self): if self.sys_admin_client is not None: self.sys_admin_client.logout() self.sys_admin_client = None def _update_task(self, status, message=None, error_message=None, stack_trace=''): if not self.tenant_client.is_sysadmin(): stack_trace = '' if self.task is None: self.task = Task(self.sys_admin_client) if message is None: message = OP_MESSAGE[self.op] if self.task_resource is not None: task_href = self.task_resource.get('href') else: task_href = None self.task_resource = self.task.update( status=status.value, namespace='vcloud.cse', operation=message, operation_name=self.op, details='', progress=None, owner_href=f"urn:cse:cluster:{self.cluster_id}", owner_name=self.cluster_name, owner_type='application/vcloud.cse.cluster+xml', user_href=self.tenant_info['user_id'], user_name=self.tenant_info['user_name'], org_href=self.tenant_info['org_href'], task_href=task_href, error_message=error_message, stack_trace=stack_trace ) def _is_valid_name(self, name): """Validate that the cluster name against the pattern.""" if len(name) > MAX_HOST_NAME_LENGTH: return False if name[-1] == '.': name = name[:-1] allowed = re.compile(r"(?!-)[A-Z\d-]{1,63}(?<!-)$", re.IGNORECASE) return all(allowed.match(x) for x in name.split(".")) def _get_template(self, name=None): server_config = get_server_runtime_config() name = name or \ self.req_spec.get(RequestKey.TEMPLATE_NAME) or \ server_config['broker']['default_template'] for template in server_config['broker']['templates']: if template['name'] == name: return template raise Exception(f"Template {name} not found.") def _get_nfs_exports(self, ip, vapp, node): """Get the exports from remote NFS server (helper method). :param ip: (str): IP address of the NFS server :param vapp: (pyvcloud.vcd.vapp.VApp): The vApp or cluster to which node belongs :param node: (str): IP address of the NFS server :param node: (`lxml.objectify.StringElement`) object representing the vm resource. :return: (List): List of exports """ # TODO(right template) find a right way to retrieve # the template from which nfs node was created. server_config = get_server_runtime_config() template = server_config['broker']['templates'][0] script = f"#!/usr/bin/env bash\nshowmount -e {ip}" result = execute_script_in_nodes( server_config, vapp, template['admin_password'], script, nodes=[node], check_tools=False) lines = result[0][1].content.decode().split('\n') exports = [] for index in range(1, len(lines) - 1): export = lines[index].strip().split()[0] exports.append(export) return exports def node_rollback(self, node_list): """Rollback for node creation failure. :param list node_list: faulty nodes to be deleted """ LOGGER.info(f"About to rollback nodes from cluster with name: " "{self.cluster_name}") LOGGER.info(f"Node list to be deleted:{node_list}") vapp = VApp(self.tenant_client, href=self.cluster['vapp_href']) template = self._get_template() try: server_config = get_server_runtime_config() delete_nodes_from_cluster(server_config, vapp, template, node_list, force=True) except Exception: LOGGER.warning("Couldn't delete node {node_list} from cluster:" "{self.cluster_name}") for vm_name in node_list: vm = VM(self.tenant_client, resource=vapp.get_vm(vm_name)) try: vm.undeploy() except Exception: LOGGER.warning(f"Couldn't undeploy VM {vm_name}") vapp.delete_vms(node_list) LOGGER.info(f"Successfully deleted nodes: {node_list}") def cluster_rollback(self): """Rollback for cluster creation failure.""" LOGGER.info(f"About to rollback cluster with name: " "{self.cluster_name}") self._connect_tenant() clusters = load_from_metadata( self.tenant_client, name=self.cluster_name) if len(clusters) != 1: LOGGER.debug(f"Cluster {self.cluster_name} not found.") return self.cluster = clusters[0] vdc = VDC(self.tenant_client, href=self.cluster['vdc_href']) vdc.delete_vapp(self.cluster['name'], force=True) LOGGER.info(f"Successfully deleted cluster: {self.cluster_name}") def run(self): LOGGER.debug(f"Thread started for operation={self.op}") if self.op == OP_CREATE_CLUSTER: self.create_cluster_thread() elif self.op == OP_DELETE_CLUSTER: self.delete_cluster_thread() elif self.op == OP_CREATE_NODES: self.create_nodes_thread() elif self.op == OP_DELETE_NODES: self.delete_nodes_thread() def list_clusters(self): self._connect_tenant() clusters = [] raw_clusters = load_from_metadata( self.tenant_client, org_name=self.req_spec.get(RequestKey.ORG_NAME), vdc_name=self.req_spec.get(RequestKey.OVDC_NAME)) for c in raw_clusters: clusters.append({ 'name': c['name'], 'IP master': c['leader_endpoint'], 'template': c['template'], 'VMs': c['number_of_vms'], 'vdc': c['vdc_name'], 'status': c['status'], 'vdc_id': c['vdc_id'], 'org_name': get_org_name_from_ovdc_id(c['vdc_id']), K8S_PROVIDER_KEY: K8sProviders.NATIVE }) return clusters def get_cluster_info(self, cluster_name): """Get the info of the cluster. :param cluster_name: (str): Name of the cluster :return: (dict): Info of the cluster. """ self._connect_tenant() clusters = load_from_metadata( self.tenant_client, name=cluster_name, org_name=self.req_spec.get(RequestKey.ORG_NAME), vdc_name=self.req_spec.get(RequestKey.OVDC_NAME)) if len(clusters) > 1: raise CseDuplicateClusterError(f"Multiple clusters of name" f" '{cluster_name}' detected.") if len(clusters) == 0: raise ClusterNotFoundError(f"Cluster '{cluster_name}' not found.") cluster = clusters[0] cluster[K8S_PROVIDER_KEY] = K8sProviders.NATIVE vapp = VApp(self.tenant_client, href=clusters[0]['vapp_href']) vms = vapp.get_all_vms() for vm in vms: node_info = { 'name': vm.get('name'), 'ipAddress': '' } try: node_info['ipAddress'] = vapp.get_primary_ip( vm.get('name')) except Exception: LOGGER.debug(f"Unable to get ip address of node " f"{vm.get('name')}") if vm.get('name').startswith(NodeType.MASTER): cluster.get('master_nodes').append(node_info) elif vm.get('name').startswith(NodeType.WORKER): cluster.get('nodes').append(node_info) elif vm.get('name').startswith(NodeType.NFS): cluster.get('nfs_nodes').append(node_info) return cluster def get_node_info(self, cluster_name, node_name): """Get the info of a given node in the cluster. :param cluster_name: (str): Name of the cluster :param node_name: (str): Name of the node :return: (dict): Info of the node. """ self._connect_tenant() clusters = load_from_metadata( self.tenant_client, name=cluster_name, org_name=self.req_spec.get(RequestKey.ORG_NAME), vdc_name=self.req_spec.get(RequestKey.OVDC_NAME)) if len(clusters) > 1: raise CseDuplicateClusterError(f"Multiple clusters of name" f" '{cluster_name}' detected.") if len(clusters) == 0: raise ClusterNotFoundError(f"Cluster '{cluster_name}' not found.") vapp = VApp(self.tenant_client, href=clusters[0]['vapp_href']) vms = vapp.get_all_vms() node_info = None for vm in vms: if (node_name == vm.get('name')): node_info = { 'name': vm.get('name'), 'numberOfCpus': '', 'memoryMB': '', 'status': VCLOUD_STATUS_MAP.get(int(vm.get('status'))), 'ipAddress': '' } if hasattr(vm, 'VmSpecSection'): node_info[ 'numberOfCpus'] = vm.VmSpecSection.NumCpus.text node_info[ 'memoryMB'] = \ vm.VmSpecSection.MemoryResourceMb.Configured.text try: node_info['ipAddress'] = vapp.get_primary_ip( vm.get('name')) except Exception: LOGGER.debug(f"Unable to get ip address of node " f"{vm.get('name')}") if vm.get('name').startswith(NodeType.MASTER): node_info['node_type'] = 'master' elif vm.get('name').startswith(NodeType.WORKER): node_info['node_type'] = 'worker' elif vm.get('name').startswith(NodeType.NFS): node_info['node_type'] = 'nfs' exports = self._get_nfs_exports(node_info['ipAddress'], vapp, vm) node_info['exports'] = exports if node_info is None: raise NodeNotFoundError(f"Node '{node_name}' not found in " f"cluster '{cluster_name}'") return node_info def get_cluster_config(self, cluster_name): self._connect_tenant() clusters = load_from_metadata( self.tenant_client, name=cluster_name, org_name=self.req_spec.get(RequestKey.ORG_NAME), vdc_name=self.req_spec.get(RequestKey.OVDC_NAME)) if len(clusters) > 1: raise CseDuplicateClusterError(f"Multiple clusters of name" f" '{cluster_name}' detected.") if len(clusters) == 0: raise ClusterNotFoundError(f"Cluster '{cluster_name}' not found.") vapp = VApp(self.tenant_client, href=clusters[0]['vapp_href']) template = self._get_template(name=clusters[0]['template']) server_config = get_server_runtime_config() result = get_cluster_config(server_config, vapp, template['admin_password']) return result @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME]) def create_cluster(self, cluster_name, vdc_name, node_count, storage_profile, network_name, template, **kwargs): # TODO(ClusterSpec) Create an inner class "ClusterSpec" # in abstract_broker.py and have subclasses define and use it # as instance variable. # Method 'Create_cluster' in VcdBroker and PksBroker should take # ClusterParams either as a param (or) # read from instance variable (if needed only). if not network_name: raise CseServerError(f"Cluster cannot be created. " f"Please provide a valid value for org " f"vDC network param.") LOGGER.debug(f"About to create cluster {cluster_name} on {vdc_name} " f"with {node_count} nodes, sp={storage_profile}") if not self._is_valid_name(cluster_name): raise CseServerError(f"Invalid cluster name '{cluster_name}'") self._connect_tenant() self._connect_sys_admin() self.cluster_name = cluster_name self.cluster_id = str(uuid.uuid4()) self.op = OP_CREATE_CLUSTER self._update_task( TaskStatus.RUNNING, message=f"Creating cluster {cluster_name}({self.cluster_id})") self.daemon = True self.start() result = {} result['name'] = self.cluster_name result['cluster_id'] = self.cluster_id result['task_href'] = self.task_resource.get('href') return result @rollback_on_failure def create_cluster_thread(self): network_name = self.req_spec.get(RequestKey.NETWORK_NAME) try: clusters = load_from_metadata(self.tenant_client, name=self.cluster_name) if len(clusters) != 0: raise ClusterAlreadyExistsError(f"Cluster {self.cluster_name} " "already exists.") org_resource = self.tenant_client.get_org_by_name( self.req_spec.get(RequestKey.ORG_NAME)) org = Org(self.tenant_client, resource=org_resource) vdc_resource = org.get_vdc(self.req_spec.get(RequestKey.OVDC_NAME)) vdc = VDC(self.tenant_client, resource=vdc_resource) template = self._get_template() self._update_task( TaskStatus.RUNNING, message=f"Creating cluster vApp {self.cluster_name}" f"({self.cluster_id})") try: vapp_resource = vdc.create_vapp( self.cluster_name, description=f"cluster {self.cluster_name}", network=network_name, fence_mode='bridged') except Exception as e: raise ClusterOperationError( "Error while creating vApp:", str(e)) self.tenant_client.get_task_monitor().wait_for_status( vapp_resource.Tasks.Task[0]) tags = {} tags['cse.cluster.id'] = self.cluster_id tags['cse.version'] = pkg_resources.require( 'container-service-extension')[0].version tags['cse.template'] = template['name'] vapp = VApp(self.tenant_client, href=vapp_resource.get('href')) for k, v in tags.items(): task = vapp.set_metadata('GENERAL', 'READWRITE', k, v) self.tenant_client.get_task_monitor().wait_for_status(task) self._update_task( TaskStatus.RUNNING, message=f"Creating master node for {self.cluster_name}" f"({self.cluster_id})") vapp.reload() server_config = get_server_runtime_config() try: add_nodes(1, template, NodeType.MASTER, server_config, self.tenant_client, org, vdc, vapp, self.req_spec) except Exception as e: raise MasterNodeCreationError( "Error while adding master node:", str(e)) self._update_task( TaskStatus.RUNNING, message=f"Initializing cluster {self.cluster_name}" f"({self.cluster_id})") vapp.reload() init_cluster(server_config, vapp, template) master_ip = get_master_ip(server_config, vapp, template) task = vapp.set_metadata('GENERAL', 'READWRITE', 'cse.master.ip', master_ip) self.tenant_client.get_task_monitor().wait_for_status(task) if self.req_spec.get(RequestKey.NUM_WORKERS) > 0: self._update_task( TaskStatus.RUNNING, message=f"Creating " f"{self.req_spec.get(RequestKey.NUM_WORKERS)} " f"node(s) for " f"{self.cluster_name}({self.cluster_id})") try: add_nodes(self.req_spec.get(RequestKey.NUM_WORKERS), template, NodeType.WORKER, server_config, self.tenant_client, org, vdc, vapp, self.req_spec) except Exception as e: raise WorkerNodeCreationError( "Error while creating worker node:", str(e)) self._update_task( TaskStatus.RUNNING, message=f"Adding " f"{self.req_spec.get(RequestKey.NUM_WORKERS)} " f"node(s) to " f"{self.cluster_name}({self.cluster_id})") vapp.reload() join_cluster(server_config, vapp, template) if self.req_spec.get(RequestKey.ENABLE_NFS): self._update_task( TaskStatus.RUNNING, message=f"Creating NFS node for {self.cluster_name}" f"({self.cluster_id})") try: add_nodes(1, template, NodeType.NFS, server_config, self.tenant_client, org, vdc, vapp, self.req_spec) except Exception as e: raise NFSNodeCreationError( "Error while creating NFS node:", str(e)) self._update_task( TaskStatus.SUCCESS, message=f"Created cluster {self.cluster_name}" f"({self.cluster_id})") except (MasterNodeCreationError, WorkerNodeCreationError, NFSNodeCreationError, ClusterJoiningError, ClusterInitializationError, ClusterOperationError) as e: LOGGER.error(traceback.format_exc()) error_obj = error_to_json(e) stack_trace = \ ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) self._update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE_KEY] [ERROR_DESCRIPTION_KEY], stack_trace=stack_trace) raise e except Exception as e: LOGGER.error(traceback.format_exc()) error_obj = error_to_json(e) stack_trace = \ ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) self._update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501 stack_trace=stack_trace) finally: self._disconnect_sys_admin() @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME]) def delete_cluster(self, cluster_name): LOGGER.debug(f"About to delete cluster with name: {cluster_name}") self.cluster_name = cluster_name self._connect_tenant() self._connect_sys_admin() self.op = OP_DELETE_CLUSTER clusters = load_from_metadata( self.tenant_client, name=self.cluster_name, org_name=self.req_spec.get(RequestKey.ORG_NAME), vdc_name=self.req_spec.get(RequestKey.OVDC_NAME)) if len(clusters) > 1: raise CseDuplicateClusterError( f"Multiple clusters of name '{self.cluster_name}' detected.") if len(clusters) != 1: raise ClusterNotFoundError( f"Cluster {self.cluster_name} not found.") self.cluster = clusters[0] self.cluster_id = self.cluster['cluster_id'] self._update_task( TaskStatus.RUNNING, message=f"Deleting cluster {self.cluster_name}" f"({self.cluster_id})") self.daemon = True self.start() result = {} result['cluster_name'] = self.cluster_name result['task_href'] = self.task_resource.get('href') return result def delete_cluster_thread(self): LOGGER.debug(f"About to delete cluster with name: {self.cluster_name}") try: vdc = VDC(self.tenant_client, href=self.cluster['vdc_href']) task = vdc.delete_vapp(self.cluster['name'], force=True) self.tenant_client.get_task_monitor().wait_for_status(task) self._update_task( TaskStatus.SUCCESS, message=f"Deleted cluster {self.cluster_name}" f"({self.cluster_id})") except Exception as e: LOGGER.error(traceback.format_exc()) self._update_task(TaskStatus.ERROR, error_message=str(e)) finally: self._disconnect_sys_admin() @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME]) def create_nodes(self): self.cluster_name = self.req_spec.get(RequestKey.CLUSTER_NAME) LOGGER.debug(f"About to add " f"{self.req_spec.get(RequestKey.NUM_WORKERS)} nodes to " f"cluster {self.cluster_name} on VDC " f"{self.req_spec.get(RequestKey.OVDC_NAME)}") if self.req_spec.get(RequestKey.NUM_WORKERS) < 1: raise CseServerError(f"Invalid node count: {self.req_spec.get(RequestKey.NUM_WORKERS)}.") # noqa: E501 if self.req_spec.get(RequestKey.NETWORK_NAME) is None: raise CseServerError(f'Network name is missing from the request.') self._connect_tenant() self._connect_sys_admin() clusters = load_from_metadata( self.tenant_client, name=self.cluster_name, org_name=self.req_spec.get(RequestKey.ORG_NAME), vdc_name=self.req_spec.get(RequestKey.OVDC_NAME)) if len(clusters) > 1: raise CseDuplicateClusterError(f"Multiple clusters of name " f"'{self.cluster_name}' detected.") if len(clusters) == 0: raise ClusterNotFoundError( f"Cluster '{self.cluster_name}' not found.") self.cluster = clusters[0] self.op = OP_CREATE_NODES self.cluster_id = self.cluster['cluster_id'] self._update_task( TaskStatus.RUNNING, message=f"Adding {self.req_spec.get(RequestKey.NUM_WORKERS)} " f"node(s) to cluster " f"{self.cluster_name}({self.cluster_id})") self.daemon = True self.start() result = {} result['cluster_name'] = self.cluster_name result['task_href'] = self.task_resource.get('href') return result @rollback_on_failure def create_nodes_thread(self): LOGGER.debug(f"About to add nodes to cluster with name: " f"{self.cluster_name}") try: server_config = get_server_runtime_config() org_resource = self.tenant_client.get_org() org = Org(self.tenant_client, resource=org_resource) vdc = VDC(self.tenant_client, href=self.cluster['vdc_href']) vapp = VApp(self.tenant_client, href=self.cluster['vapp_href']) template = self._get_template() self._update_task( TaskStatus.RUNNING, message=f"Creating {self.req_spec.get(RequestKey.NUM_WORKERS)}" f" node(s) for {self.cluster_name}({self.cluster_id})") node_type = NodeType.WORKER if self.req_spec.get(RequestKey.ENABLE_NFS): node_type = NodeType.NFS new_nodes = add_nodes(self.req_spec.get(RequestKey.NUM_WORKERS), template, node_type, server_config, self.tenant_client, org, vdc, vapp, self.req_spec) if node_type == NodeType.NFS: self._update_task( TaskStatus.SUCCESS, message=f"Created " f"{self.req_spec.get(RequestKey.NUM_WORKERS)} " f"node(s) for " f"{self.cluster_name}({self.cluster_id})") elif node_type == NodeType.WORKER: self._update_task( TaskStatus.RUNNING, message=f"Adding " f"{self.req_spec.get(RequestKey.NUM_WORKERS)} " f"node(s) to cluster " f"{self.cluster_name}({self.cluster_id})") target_nodes = [] for spec in new_nodes['specs']: target_nodes.append(spec['target_vm_name']) vapp.reload() join_cluster(server_config, vapp, template, target_nodes) self._update_task( TaskStatus.SUCCESS, message=f"Added " f"{self.req_spec.get(RequestKey.NUM_WORKERS)} " f"node(s) to cluster " f"{self.cluster_name}({self.cluster_id})") except NodeCreationError as e: error_obj = error_to_json(e) LOGGER.error(traceback.format_exc()) stack_trace = \ ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) self._update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501 stack_trace=stack_trace) raise except Exception as e: error_obj = error_to_json(e) LOGGER.error(traceback.format_exc()) stack_trace = \ ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) self._update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501 stack_trace=stack_trace) finally: self._disconnect_sys_admin() @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME]) def delete_nodes(self): result = {'body': {}} self.cluster_name = self.req_spec.get(RequestKey.CLUSTER_NAME) LOGGER.debug(f"About to delete nodes from cluster with name: " f"{self.req_spec.get(RequestKey.CLUSTER_NAME)}") if len(self.req_spec.get(RequestKey.NODE_NAMES_LIST)) < 1: raise CseServerError(f"Invalid list of nodes: {self.req_spec.get(RequestKey.NODE_NAMES_LIST)}.") # noqa: E501 for node in self.req_spec.get(RequestKey.NODE_NAMES_LIST): if node.startswith(NodeType.MASTER): raise CseServerError(f"Can't delete a master node: '{node}'.") self._connect_tenant() self._connect_sys_admin() clusters = load_from_metadata( self.tenant_client, name=self.cluster_name, org_name=self.req_spec.get(RequestKey.ORG_NAME), vdc_name=self.req_spec.get(RequestKey.OVDC_NAME)) if len(clusters) <= 0: raise CseServerError(f"Cluster '{self.cluster_name}' not found.") if len(clusters) > 1: raise CseDuplicateClusterError(f"Multiple clusters of name " f"'{self.cluster_name}' detected.") self.cluster = clusters[0] self.op = OP_DELETE_NODES self.cluster_id = self.cluster['cluster_id'] self._update_task( TaskStatus.RUNNING, message=f"Deleting " f"{len(self.req_spec.get(RequestKey.NODE_NAMES_LIST))} " f"node(s) from cluster " f"{self.cluster_name}({self.cluster_id})") self.daemon = True self.start() result = { 'cluster_name': self.cluster_name, 'task_href': self.task_resource.get('href') } return result def delete_nodes_thread(self): LOGGER.debug(f"About to delete nodes from cluster with name: " f"{self.cluster_name}") try: vapp = VApp(self.tenant_client, href=self.cluster['vapp_href']) template = self._get_template() self._update_task( TaskStatus.RUNNING, message=f"Deleting " f"{len(self.req_spec.get(RequestKey.NODE_NAMES_LIST))}" f" node(s) from " f"{self.cluster_name}({self.cluster_id})") try: server_config = get_server_runtime_config() delete_nodes_from_cluster( server_config, vapp, template, self.req_spec.get(RequestKey.NODE_NAMES_LIST), self.req_spec.get(RequestKey.FORCE_DELETE)) except Exception: LOGGER.error(f"Couldn't delete node " f"{self.req_spec.get(RequestKey.NODE_NAMES_LIST)}" f" from cluster:{self.cluster_name}") self._update_task( TaskStatus.RUNNING, message=f"Undeploying " f"{len(self.req_spec.get(RequestKey.NODE_NAMES_LIST))}" f" node(s) for {self.cluster_name}({self.cluster_id})") for vm_name in self.req_spec.get(RequestKey.NODE_NAMES_LIST): vm = VM(self.tenant_client, resource=vapp.get_vm(vm_name)) try: task = vm.undeploy() self.tenant_client.get_task_monitor().wait_for_status(task) except Exception: LOGGER.warning(f"Couldn't undeploy VM {vm_name}") self._update_task( TaskStatus.RUNNING, message=f"Deleting " f"{len(self.req_spec.get(RequestKey.NODE_NAMES_LIST))}" f" VM(s) for {self.cluster_name}({self.cluster_id})") task = vapp.delete_vms(self.req_spec.get(RequestKey.NODE_NAMES_LIST)) # noqa: E501 self.tenant_client.get_task_monitor().wait_for_status(task) self._update_task( TaskStatus.SUCCESS, message=f"Deleted " f"{len(self.req_spec.get(RequestKey.NODE_NAMES_LIST))}" f" node(s) to cluster " f"{self.cluster_name}({self.cluster_id})") except Exception as e: LOGGER.error(traceback.format_exc()) error_obj = error_to_json(e) stack_trace = \ ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) self._update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501 stack_trace=stack_trace) finally: self._disconnect_sys_admin() @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME]) def resize_cluster(self, cluster_name, node_count, curr_cluster_info=None): """Resize the cluster of a given name to given number of worker nodes. :param str name: Name of the cluster :param int node_count: New size of the worker nodes (should be greater than the current number). :param dict curr_cluster_info: Current properties of the cluster :return response: response returned by create_nodes() :rtype: dict """ if curr_cluster_info: curr_worker_count = len(curr_cluster_info['nodes']) else: cluster = self.get_cluster_info(cluster_name=cluster_name) curr_worker_count = len(cluster['nodes']) if curr_worker_count > node_count: raise CseServerError(f"Automatic scale down is not supported for " f"vCD powered Kubernetes clusters. Use " f"'vcd cse delete node' command.") elif curr_worker_count == node_count: raise CseServerError(f"Cluster - {cluster_name} is already at the " f"size of {curr_worker_count}.") self.req_spec[RequestKey.NUM_WORKERS] = node_count - curr_worker_count response = self.create_nodes() return response
class VcdBroker(AbstractBroker): """Handles cluster operations for 'native' k8s provider.""" def __init__(self, tenant_auth_token): self.tenant_client = None self.client_session = None self.tenant_user_name = None self.tenant_user_id = None self.tenant_org_name = None self.tenant_org_href = None # populates above attributes super().__init__(tenant_auth_token) self._sys_admin_client = None # private: use sys_admin_client property self.task = None self.task_resource = None @property def sys_admin_client(self): if self._sys_admin_client is None: self._sys_admin_client = vcd_utils.get_sys_admin_client() return self._sys_admin_client def logout_sys_admin_client(self): if self._sys_admin_client is not None: self._sys_admin_client.logout() self._sys_admin_client = None def get_cluster_info(self, data): """Get cluster metadata as well as node data. Common broker function that validates data for the 'cluster info' operation and returns cluster/node metadata as dictionary. Required data: cluster_name Optional data and default values: org_name=None, ovdc_name=None """ required = [ RequestKey.CLUSTER_NAME ] utils.ensure_keys_in_dict(required, data, dict_name='data') defaults = { RequestKey.ORG_NAME: None, RequestKey.OVDC_NAME: None } validated_data = {**defaults, **data} cluster_name = validated_data[RequestKey.CLUSTER_NAME] cluster = get_cluster(self.tenant_client, cluster_name, org_name=validated_data[RequestKey.ORG_NAME], ovdc_name=validated_data[RequestKey.OVDC_NAME]) cluster[K8S_PROVIDER_KEY] = K8sProvider.NATIVE vapp = VApp(self.tenant_client, href=cluster['vapp_href']) vms = vapp.get_all_vms() for vm in vms: node_info = { 'name': vm.get('name'), 'ipAddress': '' } try: node_info['ipAddress'] = vapp.get_primary_ip(vm.get('name')) except Exception: LOGGER.debug(f"Unable to get ip address of node " f"{vm.get('name')}") if vm.get('name').startswith(NodeType.MASTER): cluster.get('master_nodes').append(node_info) elif vm.get('name').startswith(NodeType.WORKER): cluster.get('nodes').append(node_info) elif vm.get('name').startswith(NodeType.NFS): cluster.get('nfs_nodes').append(node_info) return cluster def list_clusters(self, data): """List all native clusters and their relevant metadata. Common broker function that validates data for the 'list clusters' operation and returns a list of cluster data. Optional data and default values: org_name=None, ovdc_name=None """ defaults = { RequestKey.ORG_NAME: None, RequestKey.OVDC_NAME: None } validated_data = {**defaults, **data} raw_clusters = get_all_clusters( self.tenant_client, org_name=validated_data[RequestKey.ORG_NAME], ovdc_name=validated_data[RequestKey.OVDC_NAME]) clusters = [] for c in raw_clusters: clusters.append({ 'name': c['name'], 'IP master': c['leader_endpoint'], 'template_name': c.get('template_name'), 'template_revision': c.get('template_revision'), 'k8s_version': c.get('k8s_version'), 'VMs': c['number_of_vms'], 'vdc': c['vdc_name'], 'status': c['status'], 'vdc_id': c['vdc_id'], 'org_name': vcd_utils.get_org_name_from_ovdc_id(c['vdc_id']), K8S_PROVIDER_KEY: K8sProvider.NATIVE }) return clusters def get_cluster_config(self, data): """Get the cluster's kube config contents. Common broker function that validates data for 'cluster config' operation and returns the cluster's kube config file contents as a string. Required data: cluster_name Optional data and default values: org_name=None, ovdc_name=None """ required = [ RequestKey.CLUSTER_NAME ] utils.ensure_keys_in_dict(required, data, dict_name='data') defaults = { RequestKey.ORG_NAME: None, RequestKey.OVDC_NAME: None } validated_data = {**defaults, **data} cluster_name = validated_data[RequestKey.CLUSTER_NAME] cluster = get_cluster(self.tenant_client, cluster_name, org_name=validated_data[RequestKey.ORG_NAME], ovdc_name=validated_data[RequestKey.OVDC_NAME]) vapp = VApp(self.tenant_client, href=cluster['vapp_href']) node_names = get_node_names(vapp, NodeType.MASTER) all_results = [] try: for node_name in node_names: LOGGER.debug(f"getting file from node {node_name}") password = vapp.get_admin_password(node_name) vs = vs_utils.get_vsphere(self.sys_admin_client, vapp, vm_name=node_name, logger=LOGGER) vs.connect() moid = vapp.get_vm_moid(node_name) vm = vs.get_vm_by_moid(moid) filename = '/root/.kube/config' result = vs.download_file_from_guest(vm, 'root', password, filename) all_results.append(result) finally: self.logout_sys_admin_client() if len(all_results) == 0 or all_results[0].status_code != requests.codes.ok: # noqa: E501 raise ClusterOperationError("Couldn't get cluster configuration") return all_results[0].content.decode() @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME]) def create_cluster(self, data): """Start the cluster creation operation. Common broker function that validates data for the 'create cluster' operation and returns a dictionary with cluster detail and task information. Calls the asyncronous cluster create function that actually performs the work. The returned `result['task_href']` can be polled to get updates on task progress. Required data: cluster_name, org_name, ovdc_name, network_name Optional data and default values: num_nodes=2, num_cpu=None, mb_memory=None, storage_profile_name=None, ssh_key_filepath=None, template_name=default, template_revision=default, enable_nfs=False, rollback=True """ required = [ RequestKey.CLUSTER_NAME, RequestKey.ORG_NAME, RequestKey.OVDC_NAME, RequestKey.NETWORK_NAME ] utils.ensure_keys_in_dict(required, data, dict_name='data') cluster_name = data[RequestKey.CLUSTER_NAME] # check that cluster name is syntactically valid if not is_valid_cluster_name(cluster_name): raise CseServerError(f"Invalid cluster name '{cluster_name}'") # check that cluster name doesn't already exist try: get_cluster(self.tenant_client, cluster_name, org_name=data[RequestKey.ORG_NAME], ovdc_name=data[RequestKey.OVDC_NAME]) raise ClusterAlreadyExistsError(f"Cluster {cluster_name} " f"already exists.") except ClusterNotFoundError: pass # check that requested/default template is valid template = get_template( name=data.get(RequestKey.TEMPLATE_NAME), revision=data.get(RequestKey.TEMPLATE_REVISION)) defaults = { RequestKey.NUM_WORKERS: 2, RequestKey.NUM_CPU: None, RequestKey.MB_MEMORY: None, RequestKey.STORAGE_PROFILE_NAME: None, RequestKey.SSH_KEY_FILEPATH: None, RequestKey.TEMPLATE_NAME: template[LocalTemplateKey.NAME], RequestKey.TEMPLATE_REVISION: template[LocalTemplateKey.REVISION], RequestKey.ENABLE_NFS: False, RequestKey.ROLLBACK: True, } validated_data = {**defaults, **data} # TODO HACK default dictionary combining needs to be fixed validated_data[RequestKey.TEMPLATE_NAME] = validated_data[RequestKey.TEMPLATE_NAME] or template[LocalTemplateKey.NAME] # noqa: E501 validated_data[RequestKey.TEMPLATE_REVISION] = validated_data[RequestKey.TEMPLATE_REVISION] or template[LocalTemplateKey.REVISION] # noqa: E501 template_name = validated_data[RequestKey.TEMPLATE_NAME] template_revision = validated_data[RequestKey.TEMPLATE_REVISION] # check that requested number of worker nodes is at least more than 1 num_workers = validated_data[RequestKey.NUM_WORKERS] if num_workers < 1: raise CseServerError(f"Worker node count must be > 0 " f"(received {num_workers}).") cluster_id = str(uuid.uuid4()) # must _update_task or else self.task_resource is None # do not logout of sys admin, or else in pyvcloud's session.request() # call, session becomes None self._update_task( TaskStatus.RUNNING, message=f"Creating cluster vApp '{cluster_name}' ({cluster_id})" f" from template '{template_name}' " f"(revision {template_revision})") self._create_cluster_async( org_name=validated_data[RequestKey.ORG_NAME], ovdc_name=validated_data[RequestKey.OVDC_NAME], cluster_name=cluster_name, cluster_id=cluster_id, template_name=template_name, template_revision=template_revision, num_workers=validated_data[RequestKey.NUM_WORKERS], network_name=validated_data[RequestKey.NETWORK_NAME], num_cpu=validated_data[RequestKey.NUM_CPU], mb_memory=validated_data[RequestKey.MB_MEMORY], storage_profile_name=validated_data[RequestKey.STORAGE_PROFILE_NAME], # noqa: E501 ssh_key_filepath=validated_data[RequestKey.SSH_KEY_FILEPATH], enable_nfs=validated_data[RequestKey.ENABLE_NFS], rollback=validated_data[RequestKey.ROLLBACK]) return { 'name': cluster_name, 'cluster_id': cluster_id, 'task_href': self.task_resource.get('href') } @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME]) def resize_cluster(self, data): """Start the resize cluster operation. Common broker function that validates data for the 'resize cluster' operation. Native clusters cannot be resized down. Creating nodes is an asynchronous task, so the returned `result['task_href']` can be polled to get updates on task progress. Required data: cluster_name, network, num_nodes Optional data and default values: org_name=None, ovdc_name=None, rollback=True, template_name=None, template_revision=None """ # TODO default template for resizing should be master's template required = [ RequestKey.CLUSTER_NAME, RequestKey.NUM_WORKERS, RequestKey.NETWORK_NAME ] utils.ensure_keys_in_dict(required, data, dict_name='data') defaults = { RequestKey.ORG_NAME: None, RequestKey.OVDC_NAME: None, RequestKey.ROLLBACK: True, RequestKey.TEMPLATE_NAME: None, RequestKey.TEMPLATE_REVISION: None } validated_data = {**defaults, **data} cluster_name = validated_data[RequestKey.CLUSTER_NAME] num_workers_wanted = validated_data[RequestKey.NUM_WORKERS] if num_workers_wanted < 1: raise CseServerError(f"Worker node count must be > 0 " f"(received {num_workers_wanted}).") # cluster_handler.py already makes a cluster info API call to vCD, but # that call does not return any node info, so this additional # cluster info call must be made cluster_info = self.get_cluster_info(validated_data) num_workers = len(cluster_info['nodes']) if num_workers > num_workers_wanted: raise CseServerError(f"Automatic scale down is not supported for " f"vCD powered Kubernetes clusters. Use " f"'vcd cse delete node' command.") elif num_workers == num_workers_wanted: raise CseServerError(f"Cluster '{cluster_name}' already has " f"{num_workers} worker nodes.") validated_data[RequestKey.NUM_WORKERS] = num_workers_wanted - num_workers # noqa: E501 return self.create_nodes(validated_data) @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME]) def delete_cluster(self, data): """Start the delete cluster operation. Common broker function that validates data for 'delete cluster' operation. Deleting nodes is an asynchronous task, so the returned `result['task_href']` can be polled to get updates on task progress. Required data: cluster_name Optional data and default values: org_name=None, ovdc_name=None """ required = [ RequestKey.CLUSTER_NAME ] utils.ensure_keys_in_dict(required, data, dict_name='data') defaults = { RequestKey.ORG_NAME: None, RequestKey.OVDC_NAME: None } validated_data = {**defaults, **data} cluster_name = validated_data[RequestKey.CLUSTER_NAME] cluster = get_cluster(self.tenant_client, cluster_name, org_name=validated_data[RequestKey.ORG_NAME], ovdc_name=validated_data[RequestKey.OVDC_NAME]) cluster_id = cluster['cluster_id'] # must _update_task here or else self.task_resource is None # do not logout of sys admin, or else in pyvcloud's session.request() # call, session becomes None self._update_task( TaskStatus.RUNNING, message=f"Deleting cluster {cluster_name} ({cluster_id})") self._delete_cluster_async(cluster_name=cluster_name, cluster_vdc_href=cluster['vdc_href']) return { 'cluster_name': cluster_name, 'task_href': self.task_resource.get('href') } def get_node_info(self, data): """Get node metadata as dictionary. Required data: cluster_name, node_name Optional data and default values: org_name=None, ovdc_name=None """ required = [ RequestKey.CLUSTER_NAME, RequestKey.NODE_NAME ] utils.ensure_keys_in_dict(required, data, dict_name='data') defaults = { RequestKey.ORG_NAME: None, RequestKey.OVDC_NAME: None } validated_data = {**defaults, **data} cluster_name = validated_data[RequestKey.CLUSTER_NAME] node_name = validated_data[RequestKey.NODE_NAME] cluster = get_cluster(self.tenant_client, cluster_name, org_name=validated_data[RequestKey.ORG_NAME], ovdc_name=validated_data[RequestKey.OVDC_NAME]) vapp = VApp(self.tenant_client, href=cluster['vapp_href']) vms = vapp.get_all_vms() node_info = None for vm in vms: vm_name = vm.get('name') if node_name != vm_name: continue node_info = { 'name': vm_name, 'numberOfCpus': '', 'memoryMB': '', 'status': VCLOUD_STATUS_MAP.get(int(vm.get('status'))), 'ipAddress': '' } if hasattr(vm, 'VmSpecSection'): node_info['numberOfCpus'] = vm.VmSpecSection.NumCpus.text node_info['memoryMB'] = vm.VmSpecSection.MemoryResourceMb.Configured.text # noqa: E501 try: node_info['ipAddress'] = vapp.get_primary_ip(vm_name) except Exception: LOGGER.debug(f"Unable to get ip address of node {vm_name}") if vm_name.startswith(NodeType.MASTER): node_info['node_type'] = 'master' elif vm_name.startswith(NodeType.WORKER): node_info['node_type'] = 'worker' elif vm_name.startswith(NodeType.NFS): node_info['node_type'] = 'nfs' node_info['exports'] = self._get_nfs_exports(node_info['ipAddress'], vapp, vm_name) # noqa: E501 if node_info is None: raise NodeNotFoundError(f"Node '{node_name}' not found in " f"cluster '{cluster_name}'") return node_info @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME]) def create_nodes(self, data): """Start the create nodes operation. Validates data for 'node create' operation. Creating nodes is an asynchronous task, so the returned `result['task_href']` can be polled to get updates on task progress. Required data: cluster_name, network_name Optional data and default values: num_nodes=2, num_cpu=None, mb_memory=None, storage_profile_name=None, ssh_key_filepath=None, template_name=default, template_revision=default, enable_nfs=False, rollback=True """ required = [ RequestKey.CLUSTER_NAME, RequestKey.NETWORK_NAME ] utils.ensure_keys_in_dict(required, data, dict_name='data') cluster_name = data[RequestKey.CLUSTER_NAME] # check that requested/default template is valid template = get_template( name=data.get(RequestKey.TEMPLATE_NAME), revision=data.get(RequestKey.TEMPLATE_REVISION)) defaults = { RequestKey.ORG_NAME: None, RequestKey.OVDC_NAME: None, RequestKey.NUM_WORKERS: 1, RequestKey.NUM_CPU: None, RequestKey.MB_MEMORY: None, RequestKey.STORAGE_PROFILE_NAME: None, RequestKey.SSH_KEY_FILEPATH: None, RequestKey.TEMPLATE_NAME: template[LocalTemplateKey.NAME], RequestKey.TEMPLATE_REVISION: template[LocalTemplateKey.REVISION], RequestKey.ENABLE_NFS: False, RequestKey.ROLLBACK: True, } validated_data = {**defaults, **data} # TODO HACK default dictionary combining needs to be fixed validated_data[RequestKey.TEMPLATE_NAME] = validated_data[RequestKey.TEMPLATE_NAME] or template[LocalTemplateKey.NAME] # noqa: E501 validated_data[RequestKey.TEMPLATE_REVISION] = validated_data[RequestKey.TEMPLATE_REVISION] or template[LocalTemplateKey.REVISION] # noqa: E501 template_name = validated_data[RequestKey.TEMPLATE_NAME] template_revision = validated_data[RequestKey.TEMPLATE_REVISION] num_workers = validated_data[RequestKey.NUM_WORKERS] if num_workers < 1: raise CseServerError(f"Worker node count must be > 0 " f"(received {num_workers}).") cluster = get_cluster(self.tenant_client, cluster_name, org_name=validated_data[RequestKey.ORG_NAME], ovdc_name=validated_data[RequestKey.OVDC_NAME]) cluster_id = cluster['cluster_id'] # must _update_task here or else self.task_resource is None # do not logout of sys admin, or else in pyvcloud's session.request() # call, session becomes None self._update_task( TaskStatus.RUNNING, message=f"Creating {num_workers} node(s) from template " f"'{template_name}' (revision {template_revision}) and " f"adding to {cluster_name} ({cluster_id})") self._create_nodes_async( cluster_name=cluster_name, cluster_vdc_href=cluster['vdc_href'], cluster_vapp_href=cluster['vapp_href'], cluster_id=cluster_id, template_name=template_name, template_revision=template_revision, num_workers=validated_data[RequestKey.NUM_WORKERS], network_name=validated_data[RequestKey.NETWORK_NAME], num_cpu=validated_data[RequestKey.NUM_CPU], mb_memory=validated_data[RequestKey.MB_MEMORY], storage_profile_name=validated_data[RequestKey.STORAGE_PROFILE_NAME], # noqa: E501 ssh_key_filepath=validated_data[RequestKey.SSH_KEY_FILEPATH], enable_nfs=validated_data[RequestKey.ENABLE_NFS], rollback=validated_data[RequestKey.ROLLBACK]) return { 'cluster_name': cluster_name, 'task_href': self.task_resource.get('href') } @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME]) def delete_nodes(self, data): """Start the delete nodes operation. Validates data for the 'delete nodes' operation. Deleting nodes is an asynchronous task, so the returned `result['task_href']` can be polled to get updates on task progress. Required data: cluster_name, node_names_list Optional data and default values: org_name=None, ovdc_name=None """ required = [ RequestKey.CLUSTER_NAME, RequestKey.NODE_NAMES_LIST ] utils.ensure_keys_in_dict(required, data, dict_name='data') defaults = { RequestKey.ORG_NAME: None, RequestKey.OVDC_NAME: None } validated_data = {**defaults, **data} cluster_name = validated_data[RequestKey.CLUSTER_NAME] node_names_list = validated_data[RequestKey.NODE_NAMES_LIST] # check that there are nodes to delete if len(node_names_list) == 0: LOGGER.debug("No nodes specified to delete") return {'body': {}} # check that master node is not in specified nodes for node in node_names_list: if node.startswith(NodeType.MASTER): raise CseServerError(f"Can't delete a master node: '{node}'.") cluster = get_cluster(self.tenant_client, cluster_name, org_name=validated_data[RequestKey.ORG_NAME], ovdc_name=validated_data[RequestKey.OVDC_NAME]) cluster_id = cluster['cluster_id'] # must _update_task here or else self.task_resource is None # do not logout of sys admin, or else in pyvcloud's session.request() # call, session becomes None self._update_task( TaskStatus.RUNNING, message=f"Deleting {len(node_names_list)} node(s)" f" from cluster {cluster_name}({cluster_id})") self._delete_nodes_async( cluster_name=cluster_name, cluster_vapp_href=cluster['vapp_href'], node_names_list=validated_data[RequestKey.NODE_NAMES_LIST]) return { 'cluster_name': cluster_name, 'task_href': self.task_resource.get('href') } # all parameters following '*args' are required and keyword-only @run_async def _create_cluster_async(self, *args, org_name, ovdc_name, cluster_name, cluster_id, template_name, template_revision, num_workers, network_name, num_cpu, mb_memory, storage_profile_name, ssh_key_filepath, enable_nfs, rollback): org = vcd_utils.get_org(self.tenant_client, org_name=org_name) vdc = vcd_utils.get_vdc( self.tenant_client, vdc_name=ovdc_name, org=org) LOGGER.debug(f"About to create cluster {cluster_name} on {ovdc_name}" f" with {num_workers} worker nodes, " f"storage profile={storage_profile_name}") try: self._update_task( TaskStatus.RUNNING, message=f"Creating cluster vApp {cluster_name}({cluster_id})") try: vapp_resource = \ vdc.create_vapp(cluster_name, description=f"cluster {cluster_name}", network=network_name, fence_mode='bridged') except Exception as e: msg = f"Error while creating vApp: {e}" LOGGER.debug(str(e)) raise ClusterOperationError(msg) self.tenant_client.get_task_monitor().wait_for_status(vapp_resource.Tasks.Task[0]) # noqa: E501 template = get_template(template_name, template_revision) tags = { ClusterMetadataKey.CLUSTER_ID: cluster_id, ClusterMetadataKey.CSE_VERSION: pkg_resources.require('container-service-extension')[0].version, # noqa: E501 ClusterMetadataKey.TEMPLATE_NAME: template[LocalTemplateKey.NAME], # noqa: E501 ClusterMetadataKey.TEMPLATE_REVISION: template[LocalTemplateKey.REVISION] # noqa: E501 } vapp = VApp(self.tenant_client, href=vapp_resource.get('href')) task = vapp.set_multiple_metadata(tags) self.tenant_client.get_task_monitor().wait_for_status(task) self._update_task( TaskStatus.RUNNING, message=f"Creating master node for " f"{cluster_name} ({cluster_id})") vapp.reload() server_config = utils.get_server_runtime_config() catalog_name = server_config['broker']['catalog'] try: add_nodes(client=self.tenant_client, num_nodes=1, node_type=NodeType.MASTER, org=org, vdc=vdc, vapp=vapp, catalog_name=catalog_name, template=template, network_name=network_name, num_cpu=num_cpu, memory_in_mb=mb_memory, storage_profile=storage_profile_name, ssh_key_filepath=ssh_key_filepath) except Exception as e: raise MasterNodeCreationError("Error adding master node:", str(e)) self._update_task( TaskStatus.RUNNING, message=f"Initializing cluster {cluster_name} ({cluster_id})") vapp.reload() init_cluster(vapp, template[LocalTemplateKey.NAME], template[LocalTemplateKey.REVISION]) master_ip = get_master_ip(vapp) task = vapp.set_metadata('GENERAL', 'READWRITE', 'cse.master.ip', master_ip) self.tenant_client.get_task_monitor().wait_for_status(task) self._update_task( TaskStatus.RUNNING, message=f"Creating {num_workers} node(s) for " f"{cluster_name}({cluster_id})") try: add_nodes(client=self.tenant_client, num_nodes=num_workers, node_type=NodeType.WORKER, org=org, vdc=vdc, vapp=vapp, catalog_name=catalog_name, template=template, network_name=network_name, num_cpu=num_cpu, memory_in_mb=mb_memory, storage_profile=storage_profile_name, ssh_key_filepath=ssh_key_filepath) except Exception as e: raise WorkerNodeCreationError("Error creating worker node:", str(e)) self._update_task( TaskStatus.RUNNING, message=f"Adding {num_workers} node(s) to " f"{cluster_name}({cluster_id})") vapp.reload() join_cluster(vapp, template[LocalTemplateKey.NAME], template[LocalTemplateKey.REVISION]) if enable_nfs: self._update_task( TaskStatus.RUNNING, message=f"Creating NFS node for " f"{cluster_name} ({cluster_id})") try: add_nodes(client=self.tenant_client, num_nodes=1, node_type=NodeType.NFS, org=org, vdc=vdc, vapp=vapp, catalog_name=catalog_name, template=template, network_name=network_name, num_cpu=num_cpu, memory_in_mb=mb_memory, storage_profile=storage_profile_name, ssh_key_filepath=ssh_key_filepath) except Exception as e: raise NFSNodeCreationError("Error creating NFS node:", str(e)) self._update_task( TaskStatus.SUCCESS, message=f"Created cluster {cluster_name} ({cluster_id})") except (MasterNodeCreationError, WorkerNodeCreationError, NFSNodeCreationError, ClusterJoiningError, ClusterInitializationError, ClusterOperationError) as e: if rollback: msg = f"Error creating cluster {cluster_name}. " \ f"Deleting cluster (rollback=True)" self._update_task(TaskStatus.RUNNING, message=msg) LOGGER.info(msg) try: cluster = get_cluster(self.tenant_client, cluster_name, cluster_id=cluster_id, org_name=org_name, ovdc_name=ovdc_name) self._delete_cluster(cluster_name=cluster_name, cluster_vdc_href=cluster['vdc_href']) except Exception: LOGGER.error(f"Failed to delete cluster {cluster_name}", exc_info=True) LOGGER.error(f"Error creating cluster {cluster_name}", exc_info=True) error_obj = error_to_json(e) stack_trace = ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) # noqa: E501 self._update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501 stack_trace=stack_trace) # raising an exception here prints a stacktrace to server console except Exception as e: LOGGER.error(f"Unknown error creating cluster {cluster_name}", exc_info=True) error_obj = error_to_json(e) stack_trace = ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) # noqa: E501 self._update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501 stack_trace=stack_trace) finally: self.logout_sys_admin_client() @run_async def _create_nodes_async(self, *args, cluster_name, cluster_vdc_href, cluster_vapp_href, cluster_id, template_name, template_revision, num_workers, network_name, num_cpu, mb_memory, storage_profile_name, ssh_key_filepath, enable_nfs, rollback): org = vcd_utils.get_org(self.tenant_client) vdc = VDC(self.tenant_client, href=cluster_vdc_href) vapp = VApp(self.tenant_client, href=cluster_vapp_href) template = get_template(name=template_name, revision=template_revision) msg = f"Creating {num_workers} node(s) from template " \ f"'{template_name}' (revision {template_revision}) and " \ f"adding to {cluster_name} ({cluster_id})" LOGGER.debug(msg) try: self._update_task(TaskStatus.RUNNING, message=msg) node_type = NodeType.WORKER if enable_nfs: node_type = NodeType.NFS server_config = utils.get_server_runtime_config() catalog_name = server_config['broker']['catalog'] new_nodes = add_nodes(client=self.tenant_client, num_nodes=num_workers, node_type=node_type, org=org, vdc=vdc, vapp=vapp, catalog_name=catalog_name, template=template, network_name=network_name, num_cpu=num_cpu, memory_in_mb=mb_memory, storage_profile=storage_profile_name, ssh_key_filepath=ssh_key_filepath) if node_type == NodeType.NFS: self._update_task( TaskStatus.SUCCESS, message=f"Created {num_workers} node(s) for " f"{cluster_name}({cluster_id})") elif node_type == NodeType.WORKER: self._update_task( TaskStatus.RUNNING, message=f"Adding {num_workers} node(s) to cluster " f"{cluster_name}({cluster_id})") target_nodes = [] for spec in new_nodes['specs']: target_nodes.append(spec['target_vm_name']) vapp.reload() join_cluster(vapp, template[LocalTemplateKey.NAME], template[LocalTemplateKey.REVISION], target_nodes) self._update_task( TaskStatus.SUCCESS, message=f"Added {num_workers} node(s) to cluster " f"{cluster_name}({cluster_id})") except NodeCreationError as e: if rollback: msg = f"Error adding nodes to {cluster_name} {cluster_id}." \ f" Deleting nodes: {e.node_names} (rollback=True)" self._update_task(TaskStatus.RUNNING, message=msg) LOGGER.info(msg) try: self._delete_nodes(cluster_name=cluster_name, cluster_vapp_href=cluster_vapp_href, node_names_list=e.node_names) except Exception: LOGGER.error(f"Failed to delete nodes {e.node_names} " f"from cluster {cluster_name}", exc_info=True) LOGGER.error(f"Error adding nodes to {cluster_name}", exc_info=True) error_obj = error_to_json(e) LOGGER.error(str(e), exc_info=True) stack_trace = ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) # noqa: E501 self._update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501 stack_trace=stack_trace) # raising an exception here prints a stacktrace to server console except Exception as e: error_obj = error_to_json(e) LOGGER.error(str(e), exc_info=True) stack_trace = ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) # noqa: E501 self._update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501 stack_trace=stack_trace) finally: self.logout_sys_admin_client() # all parameters following '*args' are required and keyword-only @run_async def _delete_nodes_async(self, *args, cluster_name, cluster_vapp_href, node_names_list): try: self._update_task( TaskStatus.RUNNING, message=f"Deleting {len(node_names_list)} node(s) " f"from cluster {cluster_name}") self._delete_nodes(cluster_name=cluster_name, cluster_vapp_href=cluster_vapp_href, node_names_list=node_names_list) self._update_task( TaskStatus.SUCCESS, message=f"Deleted {len(node_names_list)} node(s)" f" to cluster {cluster_name}") except Exception as e: LOGGER.error(f"Unexpected error while deleting nodes " f"{node_names_list}: {e}", exc_info=True) error_obj = error_to_json(e) stack_trace = ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) # noqa: E501 self._update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501 stack_trace=stack_trace) finally: self.logout_sys_admin_client() # all parameters following '*args' are required and keyword-only @run_async def _delete_cluster_async(self, *args, cluster_name, cluster_vdc_href): try: self._update_task( TaskStatus.RUNNING, message=f"Deleting cluster {cluster_name}") self._delete_cluster(cluster_name=cluster_name, cluster_vdc_href=cluster_vdc_href) self._update_task( TaskStatus.SUCCESS, message=f"Deleted cluster {cluster_name}") except Exception as e: LOGGER.error(f"Unexpected error while deleting cluster: {e}", exc_info=True) self._update_task(TaskStatus.ERROR, error_message=str(e)) finally: self.logout_sys_admin_client() # all parameters following '*args' are required and keyword-only # synchronous cluster/node delete functions are required for rollback def _delete_cluster(self, *args, cluster_name, cluster_vdc_href): LOGGER.debug(f"About to delete cluster with name: {cluster_name}") vdc = VDC(self.tenant_client, href=cluster_vdc_href) task = vdc.delete_vapp(cluster_name, force=True) self.tenant_client.get_task_monitor().wait_for_status(task) # all parameters following '*args' are required and keyword-only def _delete_nodes(self, *args, cluster_name, cluster_vapp_href, node_names_list): LOGGER.debug(f"About to delete nodes {node_names_list} " f"from cluster {cluster_name}") vapp = VApp(self.tenant_client, href=cluster_vapp_href) try: delete_nodes_from_cluster(vapp, node_names_list) except Exception: LOGGER.error(f"Couldn't delete node {node_names_list} " f"from cluster:{cluster_name}") for vm_name in node_names_list: vm = VM(self.tenant_client, resource=vapp.get_vm(vm_name)) try: task = vm.undeploy() self.tenant_client.get_task_monitor().wait_for_status(task) except Exception: LOGGER.warning(f"Couldn't undeploy VM {vm_name}") task = vapp.delete_vms(node_names_list) self.tenant_client.get_task_monitor().wait_for_status(task) def _update_task(self, status, message='', error_message=None, stack_trace=''): """Update task or create it if it does not exist. This function should only be used in the x_async functions, or in the 6 common broker functions to create the required task. When this function is used, it logs in the sys admin client if it is not already logged in, but it does not log out. This is because many _update_task() calls are used in sequence until the task succeeds or fails. Once the task is updated to a success or failure state, then the sys admin client should be logged out. Another reason for decoupling sys admin logout and this function is because if any unknown errors occur during an operation, there should be a finally clause that takes care of logging out. """ if not self.tenant_client.is_sysadmin(): stack_trace = '' if self.task is None: self.task = Task(self.sys_admin_client) task_href = None if self.task_resource is not None: task_href = self.task_resource.get('href') org = vcd_utils.get_org(self.tenant_client) user_href = org.get_user(self.client_session.get('user')).get('href') self.task_resource = self.task.update( status=status.value, namespace='vcloud.cse', operation=message, operation_name='cluster operation', details='', progress=None, owner_href=self.tenant_org_href, owner_name=self.tenant_org_name, owner_type='application/vnd.vmware.vcloud.org+xml', user_href=user_href, user_name=self.tenant_user_name, org_href=self.tenant_org_href, task_href=task_href, error_message=error_message, stack_trace=stack_trace ) def _get_nfs_exports(self, ip, vapp, vm_name): """Get the exports from remote NFS server (helper method). :param ip: (str): IP address of the NFS server :param vapp: (pyvcloud.vcd.vapp.VApp): The vApp or cluster to which node belongs :param vm_name: name of node's VM :return: (List): List of exports """ script = f"#!/usr/bin/env bash\nshowmount -e {ip}" result = execute_script_in_nodes(vapp=vapp, node_names=[vm_name], script=script, check_tools=False) lines = result[0][1].content.decode().split('\n') exports = [] for index in range(1, len(lines) - 1): export = lines[index].strip().split()[0] exports.append(export) return exports
class DefaultBroker(AbstractBroker, threading.Thread): def __init__(self, headers, request_body): threading.Thread.__init__(self) self.headers = headers self.body = request_body self.tenant_client = None self.client_session = None self.tenant_info = None self.sys_admin_client = None self.task = None self.task_resource = None self.op = None self.cluster_name = None self.cluster_id = None self.daemon = False def get_tenant_client_session(self): if self.client_session is None: self._connect_tenant() return self.client_session def _connect_tenant(self): server_config = get_server_runtime_config() host = server_config['vcd']['host'] verify = server_config['vcd']['verify'] self.tenant_client, self.client_session = connect_vcd_user_via_token( vcd_uri=host, headers=self.headers, verify_ssl_certs=verify) self.tenant_info = { 'user_name': self.client_session.get('user'), 'user_id': self.client_session.get('userId'), 'org_name': self.client_session.get('org'), 'org_href': self.tenant_client._get_wk_endpoint( _WellKnownEndpoint.LOGGED_IN_ORG) } def _connect_sys_admin(self): self.sys_admin_client = get_vcd_sys_admin_client() def _disconnect_sys_admin(self): if self.sys_admin_client is not None: self.sys_admin_client.logout() self.sys_admin_client = None def _to_message(self, e): if hasattr(e, 'message'): return {'message': e.message} else: return {'message': str(e)} def update_task(self, status, message=None, error_message=None, stack_trace=''): if not self.tenant_client.is_sysadmin(): stack_trace = '' if self.task is None: self.task = Task(self.sys_admin_client) if message is None: message = OP_MESSAGE[self.op] if self.task_resource is not None: task_href = self.task_resource.get('href') else: task_href = None self.task_resource = self.task.update( status=status.value, namespace='vcloud.cse', operation=message, operation_name=self.op, details='', progress=None, owner_href='urn:cse:cluster:%s' % self.cluster_id, owner_name=self.cluster_name, owner_type='application/vcloud.cse.cluster+xml', user_href=self.tenant_info['user_id'], user_name=self.tenant_info['user_name'], org_href=self.tenant_info['org_href'], task_href=task_href, error_message=error_message, stack_trace=stack_trace) def is_valid_name(self, name): """Validate that the cluster name against the pattern.""" if len(name) > MAX_HOST_NAME_LENGTH: return False if name[-1] == '.': name = name[:-1] allowed = re.compile("(?!-)[A-Z\d-]{1,63}(?<!-)$", re.IGNORECASE) return all(allowed.match(x) for x in name.split(".")) def get_template(self, name=None): server_config = get_server_runtime_config() if name is None: if 'template' in self.body and self.body['template'] is not None: name = self.body['template'] else: name = server_config['broker']['default_template'] for template in server_config['broker']['templates']: if template['name'] == name: return template raise Exception('Template %s not found' % name) def run(self): LOGGER.debug('thread started op=%s' % self.op) if self.op == OP_CREATE_CLUSTER: self.create_cluster_thread() elif self.op == OP_DELETE_CLUSTER: self.delete_cluster_thread() elif self.op == OP_CREATE_NODES: self.create_nodes_thread() elif self.op == OP_DELETE_NODES: self.delete_nodes_thread() @exception_handler def list_clusters(self): result = {} result['body'] = [] result['status_code'] = OK self._connect_tenant() clusters = load_from_metadata(self.tenant_client) result['body'] = clusters return result @exception_handler def get_cluster_info(self, name): """Get the info of the cluster. :param cluster_name: (str): Name of the cluster :return: (dict): Info of the cluster. """ result = {} result['body'] = [] result['status_code'] = OK self._connect_tenant() clusters = load_from_metadata(self.tenant_client, name=name) if len(clusters) == 0: raise CseServerError('Cluster \'%s\' not found.' % name) vapp = VApp(self.tenant_client, href=clusters[0]['vapp_href']) vms = vapp.get_all_vms() for vm in vms: node_info = {'name': vm.get('name'), 'ipAddress': ''} try: node_info['ipAddress'] = vapp.get_primary_ip(vm.get('name')) except Exception: LOGGER.debug('cannot get ip address for node %s' % vm.get('name')) if vm.get('name').startswith(TYPE_MASTER): clusters[0].get('master_nodes').append(node_info) elif vm.get('name').startswith(TYPE_NODE): clusters[0].get('nodes').append(node_info) elif vm.get('name').startswith(TYPE_NFS): clusters[0].get('nfs_nodes').append(node_info) result['body'] = clusters[0] return result @exception_handler def get_node_info(self, cluster_name, node_name): """Get the info of a given node in the cluster. :param cluster_name: (str): Name of the cluster :param node_name: (str): Name of the node :return: (dict): Info of the node. """ result = {} result['body'] = [] result['status_code'] = OK self._connect_tenant() clusters = load_from_metadata(self.tenant_client, name=cluster_name) if len(clusters) == 0: raise CseServerError(f"Cluster \'{cluster_name}\' not found.") vapp = VApp(self.tenant_client, href=clusters[0]['vapp_href']) vms = vapp.get_all_vms() node_info = None for vm in vms: if (node_name == vm.get('name')): node_info = { 'name': vm.get('name'), 'numberOfCpus': '', 'memoryMB': '', 'status': VCLOUD_STATUS_MAP.get(int(vm.get('status'))), 'ipAddress': '' } if hasattr(vm, 'VmSpecSection'): node_info['numberOfCpus'] = vm.VmSpecSection.NumCpus.text node_info[ 'memoryMB'] = \ vm.VmSpecSection.MemoryResourceMb.Configured.text try: node_info['ipAddress'] = vapp.get_primary_ip( vm.get('name')) except Exception: LOGGER.debug('cannot get ip address ' 'for node %s' % vm.get('name')) if vm.get('name').startswith(TYPE_MASTER): node_info['node_type'] = 'master' elif vm.get('name').startswith(TYPE_NODE): node_info['node_type'] = 'node' elif vm.get('name').startswith(TYPE_NFS): node_info['node_type'] = 'nfsd' exports = self._get_nfs_exports(node_info['ipAddress'], vapp, vm) node_info['exports'] = exports if node_info is None: raise CseServerError('Node \'%s\' not found in cluster \'%s\'' % (node_name, cluster_name)) result['body'] = node_info return result def _get_nfs_exports(self, ip, vapp, node): """Get the exports from remote NFS server (helper method). :param ip: (str): IP address of the NFS server :param vapp: (pyvcloud.vcd.vapp.VApp): The vApp or cluster to which node belongs :param node: (str): IP address of the NFS server :param node: (`lxml.objectify.StringElement`) object representing the vm resource. :return: (List): List of exports """ # TODO(right template) find a right way to retrieve # the template from which nfs node was created. server_config = get_server_runtime_config() template = server_config['broker']['templates'][0] script = '#!/usr/bin/env bash\nshowmount -e %s' % ip result = execute_script_in_nodes(server_config, vapp, template['admin_password'], script, nodes=[node], check_tools=False) lines = result[0][1].content.decode().split('\n') exports = [] for index in range(1, len(lines) - 1): export = lines[index].strip().split()[0] exports.append(export) return exports @exception_handler @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME]) def create_cluster(self): result = {} result['body'] = {} cluster_name = self.body['name'] vdc_name = self.body['vdc'] node_count = self.body['node_count'] LOGGER.debug('About to create cluster %s on %s with %s nodes, sp=%s', cluster_name, vdc_name, node_count, self.body['storage_profile']) result['body'] = { 'message': 'can\'t create cluster \'%s\'' % cluster_name } if not self.is_valid_name(cluster_name): raise CseServerError(f"Invalid cluster name \'{cluster_name}\'") self._connect_tenant() self._connect_sys_admin() self.cluster_name = cluster_name self.cluster_id = str(uuid.uuid4()) self.op = OP_CREATE_CLUSTER self.update_task(TaskStatus.RUNNING, message='Creating cluster %s(%s)' % (cluster_name, self.cluster_id)) self.daemon = True self.start() response_body = {} response_body['name'] = self.cluster_name response_body['cluster_id'] = self.cluster_id response_body['task_href'] = self.task_resource.get('href') result['body'] = response_body result['status_code'] = ACCEPTED return result @rollback def create_cluster_thread(self): network_name = self.body['network'] try: clusters = load_from_metadata(self.tenant_client, name=self.cluster_name) if len(clusters) != 0: raise ClusterAlreadyExistsError(f"Cluster {self.cluster_name} " "already exists.") org_resource = self.tenant_client.get_org() org = Org(self.tenant_client, resource=org_resource) vdc_resource = org.get_vdc(self.body['vdc']) vdc = VDC(self.tenant_client, resource=vdc_resource) template = self.get_template() self.update_task(TaskStatus.RUNNING, message='Creating cluster vApp %s(%s)' % (self.cluster_name, self.cluster_id)) try: vapp_resource = vdc.create_vapp(self.cluster_name, description='cluster %s' % self.cluster_name, network=network_name, fence_mode='bridged') except Exception as e: raise ClusterOperationError('Error while creating vApp:', str(e)) self.tenant_client.get_task_monitor().wait_for_status( vapp_resource.Tasks.Task[0]) tags = {} tags['cse.cluster.id'] = self.cluster_id tags['cse.version'] = pkg_resources.require( 'container-service-extension')[0].version tags['cse.template'] = template['name'] vapp = VApp(self.tenant_client, href=vapp_resource.get('href')) for k, v in tags.items(): task = vapp.set_metadata('GENERAL', 'READWRITE', k, v) self.tenant_client.get_task_monitor().wait_for_status(task) self.update_task(TaskStatus.RUNNING, message='Creating master node for %s(%s)' % (self.cluster_name, self.cluster_id)) vapp.reload() server_config = get_server_runtime_config() try: add_nodes(1, template, TYPE_MASTER, server_config, self.tenant_client, org, vdc, vapp, self.body) except Exception as e: raise MasterNodeCreationError( "Error while adding master node:", str(e)) self.update_task(TaskStatus.RUNNING, message='Initializing cluster %s(%s)' % (self.cluster_name, self.cluster_id)) vapp.reload() init_cluster(server_config, vapp, template) master_ip = get_master_ip(server_config, vapp, template) task = vapp.set_metadata('GENERAL', 'READWRITE', 'cse.master.ip', master_ip) self.tenant_client.get_task_monitor().wait_for_status(task) if self.body['node_count'] > 0: self.update_task(TaskStatus.RUNNING, message='Creating %s node(s) for %s(%s)' % (self.body['node_count'], self.cluster_name, self.cluster_id)) try: add_nodes(self.body['node_count'], template, TYPE_NODE, server_config, self.tenant_client, org, vdc, vapp, self.body) except Exception as e: raise WorkerNodeCreationError( "Error while creating worker node:", str(e)) self.update_task(TaskStatus.RUNNING, message='Adding %s node(s) to %s(%s)' % (self.body['node_count'], self.cluster_name, self.cluster_id)) vapp.reload() join_cluster(server_config, vapp, template) if self.body['enable_nfs']: self.update_task(TaskStatus.RUNNING, message='Creating NFS node for %s(%s)' % (self.cluster_name, self.cluster_id)) try: add_nodes(1, template, TYPE_NFS, server_config, self.tenant_client, org, vdc, vapp, self.body) except Exception as e: raise NFSNodeCreationError( "Error while creating NFS node:", str(e)) self.update_task(TaskStatus.SUCCESS, message='Created cluster %s(%s)' % (self.cluster_name, self.cluster_id)) except (MasterNodeCreationError, WorkerNodeCreationError, NFSNodeCreationError, ClusterJoiningError, ClusterInitializationError, ClusterOperationError) as e: LOGGER.error(traceback.format_exc()) error_obj = error_to_json(e) stack_trace = ''.join(error_obj[ERROR_MESSAGE][ERROR_STACKTRACE]) self.update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE][ERROR_DESCRIPTION], stack_trace=stack_trace) raise e except Exception as e: LOGGER.error(traceback.format_exc()) error_obj = error_to_json(e) stack_trace = ''.join(error_obj[ERROR_MESSAGE][ERROR_STACKTRACE]) self.update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE][ERROR_DESCRIPTION], stack_trace=stack_trace) finally: self._disconnect_sys_admin() @exception_handler @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME]) def delete_cluster(self): result = {} result['body'] = {} LOGGER.debug(f"About to delete cluster with name: {self.body['name']}") self.cluster_name = self.body['name'] self._connect_tenant() self._connect_sys_admin() self.op = OP_DELETE_CLUSTER clusters = load_from_metadata(self.tenant_client, name=self.cluster_name) if len(clusters) != 1: raise CseServerError(f"Cluster {self.cluster_name} not found.") self.cluster = clusters[0] self.cluster_id = self.cluster['cluster_id'] self.update_task(TaskStatus.RUNNING, message='Deleting cluster %s(%s)' % (self.cluster_name, self.cluster_id)) self.daemon = True self.start() response_body = {} response_body['cluster_name'] = self.cluster_name response_body['task_href'] = self.task_resource.get('href') result['body'] = response_body result['status_code'] = ACCEPTED return result def delete_cluster_thread(self): LOGGER.debug('About to delete cluster with name: %s', self.cluster_name) try: vdc = VDC(self.tenant_client, href=self.cluster['vdc_href']) task = vdc.delete_vapp(self.cluster['name'], force=True) self.tenant_client.get_task_monitor().wait_for_status(task) self.update_task(TaskStatus.SUCCESS, message='Deleted cluster %s(%s)' % (self.cluster_name, self.cluster_id)) except Exception as e: LOGGER.error(traceback.format_exc()) self.update_task(TaskStatus.ERROR, error_message=str(e)) finally: self._disconnect_sys_admin() @exception_handler def get_cluster_config(self, cluster_name): result = {} self._connect_tenant() clusters = load_from_metadata(self.tenant_client, name=cluster_name) if len(clusters) != 1: raise CseServerError('Cluster \'%s\' not found' % cluster_name) vapp = VApp(self.tenant_client, href=clusters[0]['vapp_href']) template = self.get_template(name=clusters[0]['template']) server_config = get_server_runtime_config() result['body'] = get_cluster_config(server_config, vapp, template['admin_password']) result['status_code'] = OK return result @exception_handler @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME]) def create_nodes(self): result = {'body': {}} self.cluster_name = self.body['name'] LOGGER.debug(f"About to add {self.body['node_count']} nodes to cluster" " {self.cluster_name} on VDC {self.body['vdc']}, " "sp={self.body['storage_profile']}") if self.body['node_count'] < 1: raise CseServerError(f"Invalid node count: " f"{self.body['node_count']}.") self._connect_tenant() self._connect_sys_admin() clusters = load_from_metadata(self.tenant_client, name=self.cluster_name) if len(clusters) != 1: raise CseServerError('Cluster \'%s\' not found.' % self.cluster_name) self.cluster = clusters[0] self.op = OP_CREATE_NODES self.cluster_id = self.cluster['cluster_id'] self.update_task( TaskStatus.RUNNING, message=f"Adding {self.body['node_count']} node(s) to cluster " "{self.cluster_name}({self.cluster_id})") self.daemon = True self.start() response_body = {} response_body['cluster_name'] = self.cluster_name response_body['task_href'] = self.task_resource.get('href') result['body'] = response_body result['status_code'] = ACCEPTED return result @rollback def create_nodes_thread(self): LOGGER.debug('About to add nodes to cluster with name: %s', self.cluster_name) try: server_config = get_server_runtime_config() org_resource = self.tenant_client.get_org() org = Org(self.tenant_client, resource=org_resource) vdc = VDC(self.tenant_client, href=self.cluster['vdc_href']) vapp = VApp(self.tenant_client, href=self.cluster['vapp_href']) template = self.get_template() self.update_task( TaskStatus.RUNNING, message='Creating %s node(s) for %s(%s)' % (self.body['node_count'], self.cluster_name, self.cluster_id)) new_nodes = add_nodes(self.body['node_count'], template, self.body['node_type'], server_config, self.tenant_client, org, vdc, vapp, self.body) if self.body['node_type'] == TYPE_NFS: self.update_task(TaskStatus.SUCCESS, message='Created %s node(s) for %s(%s)' % (self.body['node_count'], self.cluster_name, self.cluster_id)) elif self.body['node_type'] == TYPE_NODE: self.update_task(TaskStatus.RUNNING, message='Adding %s node(s) to %s(%s)' % (self.body['node_count'], self.cluster_name, self.cluster_id)) target_nodes = [] for spec in new_nodes['specs']: target_nodes.append(spec['target_vm_name']) vapp.reload() join_cluster(server_config, vapp, template, target_nodes) self.update_task(TaskStatus.SUCCESS, message='Added %s node(s) to cluster %s(%s)' % (self.body['node_count'], self.cluster_name, self.cluster_id)) except NodeCreationError as e: error_obj = error_to_json(e) LOGGER.error(traceback.format_exc()) stack_trace = ''.join(error_obj[ERROR_MESSAGE][ERROR_STACKTRACE]) self.update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE][ERROR_DESCRIPTION], stack_trace=stack_trace) raise except Exception as e: error_obj = error_to_json(e) LOGGER.error(traceback.format_exc()) stack_trace = ''.join(error_obj[ERROR_MESSAGE][ERROR_STACKTRACE]) self.update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE][ERROR_DESCRIPTION], stack_trace=stack_trace) finally: self._disconnect_sys_admin() @exception_handler @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME]) def delete_nodes(self): result = {'body': {}} self.cluster_name = self.body['name'] LOGGER.debug(f"About to delete nodes from cluster with name: " "{self.body['name']}") if len(self.body['nodes']) < 1: raise CseServerError(f"Invalid list of nodes: " f"{self.body['nodes']}.") for node in self.body['nodes']: if node.startswith(TYPE_MASTER): raise CseServerError('Can\'t delete a master node: \'%s\'.' % node) self._connect_tenant() self._connect_sys_admin() clusters = load_from_metadata(self.tenant_client, name=self.cluster_name) if len(clusters) != 1: raise CseServerError('Cluster \'%s\' not found.' % self.cluster_name) self.cluster = clusters[0] self.op = OP_DELETE_NODES self.cluster_id = self.cluster['cluster_id'] self.update_task( TaskStatus.RUNNING, message=f"Deleting {len(self.body['nodes'])} node(s) from cluster " "{self.cluster_name}({self.cluster_id})") self.daemon = True self.start() response_body = {} response_body['cluster_name'] = self.cluster_name response_body['task_href'] = self.task_resource.get('href') result['body'] = response_body result['status_code'] = ACCEPTED return result def delete_nodes_thread(self): LOGGER.debug('About to delete nodes from cluster with name: %s', self.cluster_name) try: vapp = VApp(self.tenant_client, href=self.cluster['vapp_href']) template = self.get_template() self.update_task( TaskStatus.RUNNING, message='Deleting %s node(s) from %s(%s)' % (len(self.body['nodes']), self.cluster_name, self.cluster_id)) try: server_config = get_server_runtime_config() delete_nodes_from_cluster(server_config, vapp, template, self.body['nodes'], self.body['force']) except Exception: LOGGER.error(f"Couldn't delete node {self.body['nodes']} from " "cluster:{self.cluster_name}") self.update_task( TaskStatus.RUNNING, message='Undeploying %s node(s) for %s(%s)' % (len(self.body['nodes']), self.cluster_name, self.cluster_id)) for vm_name in self.body['nodes']: vm = VM(self.tenant_client, resource=vapp.get_vm(vm_name)) try: task = vm.undeploy() self.tenant_client.get_task_monitor().wait_for_status(task) except Exception: LOGGER.warning('couldn\'t undeploy VM %s' % vm_name) self.update_task( TaskStatus.RUNNING, message='Deleting %s VM(s) for %s(%s)' % (len(self.body['nodes']), self.cluster_name, self.cluster_id)) task = vapp.delete_vms(self.body['nodes']) self.tenant_client.get_task_monitor().wait_for_status(task) self.update_task( TaskStatus.SUCCESS, message='Deleted %s node(s) to cluster %s(%s)' % (len(self.body['nodes']), self.cluster_name, self.cluster_id)) except Exception as e: LOGGER.error(traceback.format_exc()) error_obj = error_to_json(e) stack_trace = ''.join(error_obj[ERROR_MESSAGE][ERROR_STACKTRACE]) self.update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE][ERROR_DESCRIPTION], stack_trace=stack_trace) finally: self._disconnect_sys_admin() @exception_handler def enable_ovdc_for_kubernetes(self): """Enable ovdc for k8-cluster deployment on given container provider. :return: result object :rtype: dict :raises CseServerError: if the user is not system administrator. """ result = dict() self._connect_tenant() if self.tenant_client.is_sysadmin(): ovdc_cache = OvdcCache(self.tenant_client) task = ovdc_cache.set_ovdc_container_provider_metadata( self.body['ovdc_name'], ovdc_id=self.body.get('ovdc_id', None), container_provider=self.body.get('container_provider', None), pks_plans=self.body['pks_plans'], org_name=self.body.get('org_name', None)) response_body = dict() response_body['ovdc_name'] = self.body['ovdc_name'] response_body['task_href'] = task.get('href') result['body'] = response_body result['status_code'] = ACCEPTED return result else: raise CseServerError("Unauthorized Operation") @exception_handler def ovdc_info_for_kubernetes(self): """Info on ovdc for k8s deployment on the given container provider. :return: result object :rtype: dict :raises CseServerError: if the user is not system administrator. """ result = dict() self._connect_tenant() if self.tenant_client.is_sysadmin(): ovdc_cache = OvdcCache(self.tenant_client) metadata = ovdc_cache.get_ovdc_container_provider_metadata( self.body.get('ovdc_name', None), ovdc_id=self.body.get('ovdc_id', None), org_name=self.body.get('org_name', None)) # remove username, secret from sending to client metadata.pop('username', None) metadata.pop('secret', None) result = dict() result['status_code'] = OK result['body'] = metadata return result else: raise CseServerError("Unauthorized Operation") def node_rollback(self, node_list): """Rollback for node creation failure. :param list node_list: faulty nodes to be deleted """ LOGGER.info(f"About to rollback nodes from cluster with name: " "{self.cluster_name}") LOGGER.info(f"Node list to be deleted:{node_list}") vapp = VApp(self.tenant_client, href=self.cluster['vapp_href']) template = self.get_template() try: server_config = get_server_runtime_config() delete_nodes_from_cluster(server_config, vapp, template, node_list, force=True) except Exception: LOGGER.warning("Couldn't delete node {node_list} from cluster:" "{self.cluster_name}") for vm_name in node_list: vm = VM(self.tenant_client, resource=vapp.get_vm(vm_name)) try: vm.undeploy() except Exception: LOGGER.warning(f"Couldn't undeploy VM {vm_name}") vapp.delete_vms(node_list) LOGGER.info(f"Successfully deleted nodes: {node_list}") def cluster_rollback(self): """Rollback for cluster creation failure.""" LOGGER.info(f"About to rollback cluster with name: " "{self.cluster_name}") self._connect_tenant() clusters = load_from_metadata(self.tenant_client, name=self.cluster_name) if len(clusters) != 1: LOGGER.debug('Cluster %s not found.' % self.cluster_name) return self.cluster = clusters[0] vdc = VDC(self.tenant_client, href=self.cluster['vdc_href']) vdc.delete_vapp(self.cluster['name'], force=True) LOGGER.info(f"Successfully deleted cluster: {self.cluster_name}")
def _reload_templates_async(op_ctx, task_href): user_context = None task = None user_href = None try: user_context = op_ctx.get_user_context(api_version=None) user_client = user_context.client org = vcd_utils.get_org(user_client, user_context.org_name) user_href = org.get_user(user_context.name).get('href') task = Task(user_client) server_config = server_utils.get_server_runtime_config() if not server_utils.is_no_vc_communication_mode(): native_templates = \ template_reader.read_native_template_definition_from_catalog( config=server_config ) server_config.set_value_at('broker.templates', native_templates) task.update(status=TaskStatus.RUNNING.value, namespace='vcloud.cse', operation="Finished reloading native templates.", operation_name='template reload', details='', progress=None, owner_href=user_context.org_href, owner_name=user_context.org_name, owner_type='application/vnd.vmware.vcloud.org+xml', user_href=user_href, user_name=user_context.name, org_href=user_context.org_href, task_href=task_href) else: msg = "Skipping loading k8s template definition from catalog " \ "since `No communication with VCenter` mode is on." logger.SERVER_LOGGER.info(msg) server_config.set_value_at('broker.templates', []) task.update(status=TaskStatus.RUNNING.value, namespace='vcloud.cse', operation=msg, operation_name='template reload', details='', progress=None, owner_href=user_context.org_href, owner_name=user_context.org_name, owner_type='application/vnd.vmware.vcloud.org+xml', user_href=user_href, user_name=user_context.name, org_href=user_context.org_href, task_href=task_href) task.update(status=TaskStatus.RUNNING.value, namespace='vcloud.cse', operation="Reloading TKG templates.", operation_name='template reload', details='', progress=None, owner_href=user_context.org_href, owner_name=user_context.org_name, owner_type='application/vnd.vmware.vcloud.org+xml', user_href=user_href, user_name=user_context.name, org_href=user_context.org_href, task_href=task_href) tkgm_templates = \ template_reader.read_tkgm_template_definition_from_catalog( config=server_config ) server_config.set_value_at('broker.tkgm_templates', tkgm_templates) task.update(status=TaskStatus.SUCCESS.value, namespace='vcloud.cse', operation="Finished reloading all templates.", operation_name='template reload', details='', progress=None, owner_href=user_context.org_href, owner_name=user_context.org_name, owner_type='application/vnd.vmware.vcloud.org+xml', user_href=user_href, user_name=user_context.name, org_href=user_context.org_href, task_href=task_href) except Exception: msg = "Error reloading templates." logger.SERVER_LOGGER.error(msg, exc_info=True) if task and user_context and user_href: task.update(status=TaskStatus.ERROR.value, namespace='vcloud.cse', operation=msg, operation_name='template reload', details='', progress=None, owner_href=user_context.org_href, owner_name=user_context.org_name, owner_type='application/vnd.vmware.vcloud.org+xml', user_href=user_href, user_name=user_context.name, org_href=user_context.org_href, task_href=task_href) finally: op_ctx.end()
class DefaultBroker(threading.Thread): def __init__(self, config): threading.Thread.__init__(self) self.config = config self.host = config['vcd']['host'] self.username = config['vcd']['username'] self.password = config['vcd']['password'] self.version = config['vcd']['api_version'] self.verify = config['vcd']['verify'] self.log = config['vcd']['log'] def _connect_sysadmin(self): if not self.verify: LOGGER.warning('InsecureRequestWarning: ' 'Unverified HTTPS request is being made. ' 'Adding certificate verification is strongly ' 'advised.') requests.packages.urllib3.disable_warnings() self.client_sysadmin = Client(uri=self.host, api_version=self.version, verify_ssl_certs=self.verify, log_headers=True, log_bodies=True) credentials = BasicLoginCredentials(self.username, SYSTEM_ORG_NAME, self.password) self.client_sysadmin.set_credentials(credentials) def _connect_tenant(self, headers): token = headers.get('x-vcloud-authorization') accept_header = headers.get('Accept') version = accept_header.split('version=')[1] self.client_tenant = Client(uri=self.host, api_version=version, verify_ssl_certs=self.verify, log_headers=True, log_bodies=True) session = self.client_tenant.rehydrate_from_token(token) return { 'user_name': session.get('user'), 'user_id': session.get('userId'), 'org_name': session.get('org'), 'org_href': self.client_tenant._get_wk_endpoint( _WellKnownEndpoint.LOGGED_IN_ORG) } def _to_message(self, e): if hasattr(e, 'message'): return {'message': e.message} else: return {'message': str(e)} def update_task(self, status, message=None, error_message=None): if not hasattr(self, 'task'): self.task = Task(self.client_sysadmin) if message is None: message = OP_MESSAGE[self.op] if hasattr(self, 'task_resource'): task_href = self.task_resource.get('href') else: task_href = None self.task_resource = self.task.update( status.value, 'vcloud.cse', message, self.op, '', None, 'urn:cse:cluster:%s' % self.cluster_id, self.cluster_name, 'application/vcloud.cse.cluster+xml', self.tenant_info['user_id'], self.tenant_info['user_name'], org_href=self.tenant_info['org_href'], task_href=task_href, error_message=error_message) def is_valid_name(self, name): """Validate that the cluster name against the pattern.""" if len(name) > MAX_HOST_NAME_LENGTH: return False if name[-1] == '.': name = name[:-1] allowed = re.compile("(?!-)[A-Z\d-]{1,63}(?<!-)$", re.IGNORECASE) return all(allowed.match(x) for x in name.split(".")) def get_template(self, name=None): if name is None: if 'template' in self.body and self.body['template'] is not None: name = self.body['template'] else: name = self.config['broker']['default_template'] for template in self.config['broker']['templates']: if template['name'] == name: return template raise Exception('Template %s not found' % name) def run(self): LOGGER.debug('thread started op=%s' % self.op) if self.op == OP_CREATE_CLUSTER: self.create_cluster_thread() elif self.op == OP_DELETE_CLUSTER: self.delete_cluster_thread() elif self.op == OP_CREATE_NODES: self.create_nodes_thread() elif self.op == OP_DELETE_NODES: self.delete_nodes_thread() @exception_handler def list_clusters(self, headers, body): result = {} result['body'] = [] result['status_code'] = OK self._connect_tenant(headers) clusters = load_from_metadata(self.client_tenant) result['body'] = clusters return result @exception_handler def get_cluster_info(self, name, headers, body): """Get the info of the cluster. :param cluster_name: (str): Name of the cluster :param headers: (str): Request headers :return: (dict): Info of the cluster. """ result = {} result['body'] = [] result['status_code'] = OK self._connect_tenant(headers) clusters = load_from_metadata(self.client_tenant, name=name) if len(clusters) == 0: raise CseServerError('Cluster \'%s\' not found.' % name) vapp = VApp(self.client_tenant, href=clusters[0]['vapp_href']) vms = vapp.get_all_vms() for vm in vms: node_info = {'name': vm.get('name'), 'ipAddress': ''} try: node_info['ipAddress'] = vapp.get_primary_ip(vm.get('name')) except Exception: LOGGER.debug('cannot get ip address for node %s' % vm.get('name')) if vm.get('name').startswith(TYPE_MASTER): clusters[0].get('master_nodes').append(node_info) elif vm.get('name').startswith(TYPE_NODE): clusters[0].get('nodes').append(node_info) elif vm.get('name').startswith(TYPE_NFS): clusters[0].get('nfs_nodes').append(node_info) result['body'] = clusters[0] return result @exception_handler def get_node_info(self, cluster_name, node_name, headers): """Get the info of a given node in the cluster. :param cluster_name: (str): Name of the cluster :param node_name: (str): Name of the node :param headers: (str): Request headers :return: (dict): Info of the node. """ result = {} result['body'] = [] result['status_code'] = OK self._connect_tenant(headers) clusters = load_from_metadata(self.client_tenant, name=cluster_name) if len(clusters) == 0: raise CseServerError('Cluster \'%s\' not found.' % cluster_name) vapp = VApp(self.client_tenant, href=clusters[0]['vapp_href']) vms = vapp.get_all_vms() node_info = None for vm in vms: if (node_name == vm.get('name')): node_info = { 'name': vm.get('name'), 'numberOfCpus': '', 'memoryMB': '', 'status': VCLOUD_STATUS_MAP.get(int(vm.get('status'))), 'ipAddress': '' } if hasattr(vm, 'VmSpecSection'): node_info['numberOfCpus'] = vm.VmSpecSection.NumCpus.text node_info[ 'memoryMB'] = \ vm.VmSpecSection.MemoryResourceMb.Configured.text try: node_info['ipAddress'] = vapp.get_primary_ip( vm.get('name')) except Exception: LOGGER.debug('cannot get ip address ' 'for node %s' % vm.get('name')) if vm.get('name').startswith(TYPE_MASTER): node_info['node_type'] = 'master' elif vm.get('name').startswith(TYPE_NODE): node_info['node_type'] = 'node' elif vm.get('name').startswith(TYPE_NFS): node_info['node_type'] = 'nfsd' exports = self._get_nfs_exports(node_info['ipAddress'], vapp, vm) node_info['exports'] = exports if node_info is None: raise CseServerError('Node \'%s\' not found in cluster \'%s\'' % (node_name, cluster_name)) result['body'] = node_info return result def _get_nfs_exports(self, ip, vapp, node): """Get the exports from remote NFS server (helper method). :param ip: (str): IP address of the NFS server :param vapp: (pyvcloud.vcd.vapp.VApp): The vApp or cluster to which node belongs :param node: (str): IP address of the NFS server :param node: (`lxml.objectify.StringElement`) object representing the vm resource. :return: (List): List of exports """ # TODO(right template) find a right way to retrieve # the template from which nfs node was created. template = self.config['broker']['templates'][0] script = '#!/usr/bin/env bash\nshowmount -e %s' % ip result = execute_script_in_nodes(self.config, vapp, template['admin_password'], script, nodes=[node], check_tools=False) lines = result[0][1].content.decode().split('\n') exports = [] for index in range(1, len(lines) - 1): export = lines[index].strip().split()[0] exports.append(export) return exports @exception_handler def create_cluster(self, headers, body): result = {} result['body'] = {} cluster_name = body['name'] vdc_name = body['vdc'] node_count = body['node_count'] LOGGER.debug('about to create cluster %s on %s with %s nodes, sp=%s', cluster_name, vdc_name, node_count, body['storage_profile']) result['body'] = { 'message': 'can\'t create cluster \'%s\'' % cluster_name } if not self.is_valid_name(cluster_name): raise CseServerError(f"Invalid cluster name \'{cluster_name}\'") self.tenant_info = self._connect_tenant(headers) self.headers = headers self.body = body self.cluster_name = cluster_name self.cluster_id = str(uuid.uuid4()) self.op = OP_CREATE_CLUSTER self._connect_sysadmin() self.update_task(TaskStatus.RUNNING, message='Creating cluster %s(%s)' % (cluster_name, self.cluster_id)) self.daemon = True self.start() response_body = {} response_body['name'] = self.cluster_name response_body['cluster_id'] = self.cluster_id response_body['task_href'] = self.task_resource.get('href') result['body'] = response_body result['status_code'] = ACCEPTED return result @rollback def create_cluster_thread(self): network_name = self.body['network'] try: clusters = load_from_metadata(self.client_tenant, name=self.cluster_name) if len(clusters) != 0: raise ClusterAlreadyExistsError( f'Cluster {self.cluster_name} already exists.') org_resource = self.client_tenant.get_org() org = Org(self.client_tenant, resource=org_resource) vdc_resource = org.get_vdc(self.body['vdc']) vdc = VDC(self.client_tenant, resource=vdc_resource) template = self.get_template() self.update_task(TaskStatus.RUNNING, message='Creating cluster vApp %s(%s)' % (self.cluster_name, self.cluster_id)) try: vapp_resource = vdc.create_vapp(self.cluster_name, description='cluster %s' % self.cluster_name, network=network_name, fence_mode='bridged') except Exception as e: raise ClusterOperationError('Error while creating vApp:', str(e)) self.client_tenant.get_task_monitor().wait_for_status( vapp_resource.Tasks.Task[0]) tags = {} tags['cse.cluster.id'] = self.cluster_id tags['cse.version'] = pkg_resources.require( 'container-service-extension')[0].version tags['cse.template'] = template['name'] vapp = VApp(self.client_tenant, href=vapp_resource.get('href')) for k, v in tags.items(): task = vapp.set_metadata('GENERAL', 'READWRITE', k, v) self.client_tenant.get_task_monitor().wait_for_status(task) self.update_task(TaskStatus.RUNNING, message='Creating master node for %s(%s)' % (self.cluster_name, self.cluster_id)) vapp.reload() try: add_nodes(1, template, TYPE_MASTER, self.config, self.client_tenant, org, vdc, vapp, self.body) except Exception as e: raise MasterNodeCreationError( "Error while adding master node:", str(e)) self.update_task(TaskStatus.RUNNING, message='Initializing cluster %s(%s)' % (self.cluster_name, self.cluster_id)) vapp.reload() init_cluster(self.config, vapp, template) master_ip = get_master_ip(self.config, vapp, template) task = vapp.set_metadata('GENERAL', 'READWRITE', 'cse.master.ip', master_ip) self.client_tenant.get_task_monitor().wait_for_status(task) if self.body['node_count'] > 0: self.update_task(TaskStatus.RUNNING, message='Creating %s node(s) for %s(%s)' % (self.body['node_count'], self.cluster_name, self.cluster_id)) try: add_nodes(self.body['node_count'], template, TYPE_NODE, self.config, self.client_tenant, org, vdc, vapp, self.body) except Exception as e: raise WorkerNodeCreationError( "Error while creating worker node:", str(e)) self.update_task(TaskStatus.RUNNING, message='Adding %s node(s) to %s(%s)' % (self.body['node_count'], self.cluster_name, self.cluster_id)) vapp.reload() join_cluster(self.config, vapp, template) if self.body['enable_nfs']: self.update_task(TaskStatus.RUNNING, message='Creating NFS node for %s(%s)' % (self.cluster_name, self.cluster_id)) try: add_nodes(1, template, TYPE_NFS, self.config, self.client_tenant, org, vdc, vapp, self.body) except Exception as e: raise NFSNodeCreationError( "Error while creating NFS node:", str(e)) self.update_task(TaskStatus.SUCCESS, message='Created cluster %s(%s)' % (self.cluster_name, self.cluster_id)) except (MasterNodeCreationError, WorkerNodeCreationError, NFSNodeCreationError, ClusterJoiningError, ClusterInitializationError, ClusterOperationError) as e: LOGGER.error(traceback.format_exc()) error_obj = error_to_json(e) self.update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE][ERROR_DESCRIPTION]) raise e except Exception as e: LOGGER.error(traceback.format_exc()) error_obj = error_to_json(e) self.update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE][ERROR_DESCRIPTION]) @exception_handler def delete_cluster(self, headers, body): result = {} result['body'] = {} LOGGER.debug('about to delete cluster with name: %s' % body['name']) result['status_code'] = INTERNAL_SERVER_ERROR self.cluster_name = body['name'] self.tenant_info = self._connect_tenant(headers) self.headers = headers self.body = body self.op = OP_DELETE_CLUSTER self._connect_sysadmin() clusters = load_from_metadata(self.client_tenant, name=self.cluster_name) if len(clusters) != 1: raise CseServerError('Cluster %s not found.' % self.cluster_name) self.cluster = clusters[0] self.cluster_id = self.cluster['cluster_id'] self.update_task(TaskStatus.RUNNING, message='Deleting cluster %s(%s)' % (self.cluster_name, self.cluster_id)) self.daemon = True self.start() response_body = {} response_body['cluster_name'] = self.cluster_name response_body['task_href'] = self.task_resource.get('href') result['body'] = response_body result['status_code'] = ACCEPTED return result def delete_cluster_thread(self): LOGGER.debug('about to delete cluster with name: %s', self.cluster_name) try: vdc = VDC(self.client_tenant, href=self.cluster['vdc_href']) task = vdc.delete_vapp(self.cluster['name'], force=True) self.client_tenant.get_task_monitor().wait_for_status(task) self.update_task(TaskStatus.SUCCESS, message='Deleted cluster %s(%s)' % (self.cluster_name, self.cluster_id)) except Exception as e: LOGGER.error(traceback.format_exc()) self.update_task(TaskStatus.ERROR, error_message=str(e)) @exception_handler def get_cluster_config(self, cluster_name, headers): result = {} self._connect_tenant(headers) clusters = load_from_metadata(self.client_tenant, name=cluster_name) if len(clusters) != 1: raise CseServerError('Cluster \'%s\' not found' % cluster_name) vapp = VApp(self.client_tenant, href=clusters[0]['vapp_href']) template = self.get_template(name=clusters[0]['template']) result['body'] = get_cluster_config(self.config, vapp, template['admin_password']) result['status_code'] = OK return result @exception_handler def create_nodes(self, headers, body): result = {'body': {}} self.cluster_name = body['name'] LOGGER.debug('about to add %s nodes to cluster %s on VDC %s, sp=%s', body['node_count'], self.cluster_name, body['vdc'], body['storage_profile']) if body['node_count'] < 1: raise CseServerError('Invalid node count: %s.' % body['node_count']) self.tenant_info = self._connect_tenant(headers) clusters = load_from_metadata(self.client_tenant, name=self.cluster_name) if len(clusters) != 1: raise CseServerError('Cluster \'%s\' not found.' % self.cluster_name) self.cluster = clusters[0] self.headers = headers self.body = body self.op = OP_CREATE_NODES self._connect_sysadmin() self.cluster_id = self.cluster['cluster_id'] self.update_task( TaskStatus.RUNNING, message='Adding %s node(s) to cluster %s(%s)' % (body['node_count'], self.cluster_name, self.cluster_id)) self.daemon = True self.start() response_body = {} response_body['cluster_name'] = self.cluster_name response_body['task_href'] = self.task_resource.get('href') result['body'] = response_body result['status_code'] = ACCEPTED return result @rollback def create_nodes_thread(self): LOGGER.debug('about to add nodes to cluster with name: %s', self.cluster_name) try: org_resource = self.client_tenant.get_org() org = Org(self.client_tenant, resource=org_resource) vdc = VDC(self.client_tenant, href=self.cluster['vdc_href']) vapp = VApp(self.client_tenant, href=self.cluster['vapp_href']) template = self.get_template() self.update_task( TaskStatus.RUNNING, message='Creating %s node(s) for %s(%s)' % (self.body['node_count'], self.cluster_name, self.cluster_id)) new_nodes = add_nodes(self.body['node_count'], template, self.body['node_type'], self.config, self.client_tenant, org, vdc, vapp, self.body) if self.body['node_type'] == TYPE_NFS: self.update_task(TaskStatus.SUCCESS, message='Created %s node(s) for %s(%s)' % (self.body['node_count'], self.cluster_name, self.cluster_id)) elif self.body['node_type'] == TYPE_NODE: self.update_task(TaskStatus.RUNNING, message='Adding %s node(s) to %s(%s)' % (self.body['node_count'], self.cluster_name, self.cluster_id)) target_nodes = [] for spec in new_nodes['specs']: target_nodes.append(spec['target_vm_name']) vapp.reload() join_cluster(self.config, vapp, template, target_nodes) self.update_task(TaskStatus.SUCCESS, message='Added %s node(s) to cluster %s(%s)' % (self.body['node_count'], self.cluster_name, self.cluster_id)) except NodeCreationError as e: error_obj = error_to_json(e) LOGGER.error(traceback.format_exc()) self.update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE][ERROR_DESCRIPTION]) raise except Exception as e: error_obj = error_to_json(e) LOGGER.error(traceback.format_exc()) self.update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE][ERROR_DESCRIPTION]) @exception_handler def delete_nodes(self, headers, body): result = {'body': {}} self.cluster_name = body['name'] LOGGER.debug('about to delete nodes from cluster with name: %s' % body['name']) if len(body['nodes']) < 1: raise CseServerError('Invalid list of nodes: %s.' % body['nodes']) for node in body['nodes']: if node.startswith(TYPE_MASTER): raise CseServerError('Can\'t delete a master node: \'%s\'.' % node) self.tenant_info = self._connect_tenant(headers) clusters = load_from_metadata(self.client_tenant, name=self.cluster_name) if len(clusters) != 1: raise CseServerError('Cluster \'%s\' not found.' % self.cluster_name) self.cluster = clusters[0] self.headers = headers self.body = body self.op = OP_DELETE_NODES self._connect_sysadmin() self.cluster_id = self.cluster['cluster_id'] self.update_task( TaskStatus.RUNNING, message='Deleting %s node(s) from cluster %s(%s)' % (len(body['nodes']), self.cluster_name, self.cluster_id)) self.daemon = True self.start() response_body = {} response_body['cluster_name'] = self.cluster_name response_body['task_href'] = self.task_resource.get('href') result['body'] = response_body result['status_code'] = ACCEPTED return result def delete_nodes_thread(self): LOGGER.debug('about to delete nodes from cluster with name: %s', self.cluster_name) try: vapp = VApp(self.client_tenant, href=self.cluster['vapp_href']) template = self.get_template() self.update_task( TaskStatus.RUNNING, message='Deleting %s node(s) from %s(%s)' % (len(self.body['nodes']), self.cluster_name, self.cluster_id)) try: delete_nodes_from_cluster(self.config, vapp, template, self.body['nodes'], self.body['force']) except Exception: LOGGER.error("Couldn't delete node %s from cluster:%s" % (self.body['nodes'], self.cluster_name)) self.update_task( TaskStatus.RUNNING, message='Undeploying %s node(s) for %s(%s)' % (len(self.body['nodes']), self.cluster_name, self.cluster_id)) for vm_name in self.body['nodes']: vm = VM(self.client_tenant, resource=vapp.get_vm(vm_name)) try: task = vm.undeploy() self.client_tenant.get_task_monitor().wait_for_status(task) except Exception as e: LOGGER.warning('couldn\'t undeploy VM %s' % vm_name) self.update_task( TaskStatus.RUNNING, message='Deleting %s VM(s) for %s(%s)' % (len(self.body['nodes']), self.cluster_name, self.cluster_id)) task = vapp.delete_vms(self.body['nodes']) self.client_tenant.get_task_monitor().wait_for_status(task) self.update_task( TaskStatus.SUCCESS, message='Deleted %s node(s) to cluster %s(%s)' % (len(self.body['nodes']), self.cluster_name, self.cluster_id)) except Exception as e: LOGGER.error(traceback.format_exc()) self.update_task(TaskStatus.ERROR, error_message=str(e)) def node_rollback(self, node_list): """Implements rollback for node creation failure :param list node_list: faulty nodes to be deleted """ LOGGER.info('About to rollback nodes from cluster with name: %s' % self.cluster_name) LOGGER.info('Node list to be deleted:%s' % node_list) vapp = VApp(self.client_tenant, href=self.cluster['vapp_href']) template = self.get_template() try: delete_nodes_from_cluster(self.config, vapp, template, node_list, force=True) except Exception: LOGGER.warning("Couldn't delete node %s from cluster:%s" % (node_list, self.cluster_name)) for vm_name in node_list: vm = VM(self.client_tenant, resource=vapp.get_vm(vm_name)) try: vm.undeploy() except Exception: LOGGER.warning("Couldn't undeploy VM %s" % vm_name) vapp.delete_vms(node_list) LOGGER.info('Successfully deleted nodes: %s' % node_list) def cluster_rollback(self): """Implements rollback for cluster creation failure""" LOGGER.info('About to rollback cluster with name: %s' % self.cluster_name) clusters = load_from_metadata(self.client_tenant, name=self.cluster_name) if len(clusters) != 1: LOGGER.debug('Cluster %s not found.' % self.cluster_name) return self.cluster = clusters[0] vdc = VDC(self.client_tenant, href=self.cluster['vdc_href']) vdc.delete_vapp(self.cluster['name'], force=True) LOGGER.info('Successfully deleted cluster: %s' % self.cluster_name)