def create_cluster_thread(self): network_name = self.body['network'] try: clusters = load_from_metadata(self.client_tenant, name=self.cluster_name) if len(clusters) != 0: raise Exception('Cluster already exists.') org_resource = self.client_tenant.get_org() org = Org(self.client_tenant, resource=org_resource) vdc_resource = org.get_vdc(self.body['vdc']) vdc = VDC(self.client_tenant, resource=vdc_resource) template = self.get_template() self.update_task(TaskStatus.RUNNING, self.op, message='Creating cluster vApp %s(%s)' % (self.cluster_name, self.cluster_id)) vapp_resource = vdc.create_vapp(self.cluster_name, description='cluster %s' % self.cluster_name, network=network_name, fence_mode='bridged') t = self.client_tenant.get_task_monitor().wait_for_status( task=vapp_resource.Tasks.Task[0], timeout=60, poll_frequency=2, fail_on_status=None, expected_target_statuses=[ TaskStatus.SUCCESS, TaskStatus.ABORTED, TaskStatus.ERROR, TaskStatus.CANCELED ], callback=None) assert t.get('status').lower() == TaskStatus.SUCCESS.value tags = {} tags['cse.cluster.id'] = self.cluster_id tags['cse.version'] = pkg_resources.require( 'container-service-extension')[0].version tags['cse.template'] = template['name'] vapp = VApp(self.client_tenant, href=vapp_resource.get('href')) for k, v in tags.items(): t = vapp.set_metadata('GENERAL', 'READWRITE', k, v) self.client_tenant.get_task_monitor().\ wait_for_status( task=t, timeout=600, poll_frequency=5, fail_on_status=None, expected_target_statuses=[TaskStatus.SUCCESS], callback=None) self.update_task(TaskStatus.RUNNING, self.op, message='Creating master node for %s(%s)' % (self.cluster_name, self.cluster_id)) vapp.reload() add_nodes(1, template, TYPE_MASTER, self.config, self.client_tenant, org, vdc, vapp, self.body, wait=True) self.update_task(TaskStatus.RUNNING, self.op, message='Initializing cluster %s(%s)' % (self.cluster_name, self.cluster_id)) vapp.reload() init_cluster(self.config, vapp, template) master_ip = get_master_ip(self.config, vapp, template) t = vapp.set_metadata('GENERAL', 'READWRITE', 'cse.master.ip', master_ip) self.client_tenant.get_task_monitor().\ wait_for_status( task=t, timeout=600, poll_frequency=5, fail_on_status=None, expected_target_statuses=[TaskStatus.SUCCESS], callback=None) if self.body['node_count'] > 0: self.update_task(TaskStatus.RUNNING, self.op, message='Creating %s node(s) for %s(%s)' % (self.body['node_count'], self.cluster_name, self.cluster_id)) add_nodes(self.body['node_count'], template, TYPE_NODE, self.config, self.client_tenant, org, vdc, vapp, self.body, wait=True) self.update_task(TaskStatus.RUNNING, self.op, message='Adding %s node(s) to %s(%s)' % (self.body['node_count'], self.cluster_name, self.cluster_id)) vapp.reload() join_cluster(self.config, vapp, template) self.update_task(TaskStatus.SUCCESS, self.op, message='Created cluster %s(%s)' % (self.cluster_name, self.cluster_id)) except Exception as e: LOGGER.error(traceback.format_exc()) self.update_task(TaskStatus.ERROR, self.op, error_message=str(e))
def _create_cluster_async(self, *args, org_name, ovdc_name, cluster_name, cluster_id, template_name, template_revision, num_workers, network_name, num_cpu, mb_memory, storage_profile_name, ssh_key_filepath, enable_nfs, rollback): org = vcd_utils.get_org(self.tenant_client, org_name=org_name) vdc = vcd_utils.get_vdc( self.tenant_client, vdc_name=ovdc_name, org=org) LOGGER.debug(f"About to create cluster {cluster_name} on {ovdc_name}" f" with {num_workers} worker nodes, " f"storage profile={storage_profile_name}") try: self._update_task( TaskStatus.RUNNING, message=f"Creating cluster vApp {cluster_name}({cluster_id})") try: vapp_resource = \ vdc.create_vapp(cluster_name, description=f"cluster {cluster_name}", network=network_name, fence_mode='bridged') except Exception as e: msg = f"Error while creating vApp: {e}" LOGGER.debug(str(e)) raise ClusterOperationError(msg) self.tenant_client.get_task_monitor().wait_for_status(vapp_resource.Tasks.Task[0]) # noqa: E501 template = get_template(template_name, template_revision) tags = { ClusterMetadataKey.CLUSTER_ID: cluster_id, ClusterMetadataKey.CSE_VERSION: pkg_resources.require('container-service-extension')[0].version, # noqa: E501 ClusterMetadataKey.TEMPLATE_NAME: template[LocalTemplateKey.NAME], # noqa: E501 ClusterMetadataKey.TEMPLATE_REVISION: template[LocalTemplateKey.REVISION] # noqa: E501 } vapp = VApp(self.tenant_client, href=vapp_resource.get('href')) task = vapp.set_multiple_metadata(tags) self.tenant_client.get_task_monitor().wait_for_status(task) self._update_task( TaskStatus.RUNNING, message=f"Creating master node for " f"{cluster_name} ({cluster_id})") vapp.reload() server_config = utils.get_server_runtime_config() catalog_name = server_config['broker']['catalog'] try: add_nodes(client=self.tenant_client, num_nodes=1, node_type=NodeType.MASTER, org=org, vdc=vdc, vapp=vapp, catalog_name=catalog_name, template=template, network_name=network_name, num_cpu=num_cpu, memory_in_mb=mb_memory, storage_profile=storage_profile_name, ssh_key_filepath=ssh_key_filepath) except Exception as e: raise MasterNodeCreationError("Error adding master node:", str(e)) self._update_task( TaskStatus.RUNNING, message=f"Initializing cluster {cluster_name} ({cluster_id})") vapp.reload() init_cluster(vapp, template[LocalTemplateKey.NAME], template[LocalTemplateKey.REVISION]) master_ip = get_master_ip(vapp) task = vapp.set_metadata('GENERAL', 'READWRITE', 'cse.master.ip', master_ip) self.tenant_client.get_task_monitor().wait_for_status(task) self._update_task( TaskStatus.RUNNING, message=f"Creating {num_workers} node(s) for " f"{cluster_name}({cluster_id})") try: add_nodes(client=self.tenant_client, num_nodes=num_workers, node_type=NodeType.WORKER, org=org, vdc=vdc, vapp=vapp, catalog_name=catalog_name, template=template, network_name=network_name, num_cpu=num_cpu, memory_in_mb=mb_memory, storage_profile=storage_profile_name, ssh_key_filepath=ssh_key_filepath) except Exception as e: raise WorkerNodeCreationError("Error creating worker node:", str(e)) self._update_task( TaskStatus.RUNNING, message=f"Adding {num_workers} node(s) to " f"{cluster_name}({cluster_id})") vapp.reload() join_cluster(vapp, template[LocalTemplateKey.NAME], template[LocalTemplateKey.REVISION]) if enable_nfs: self._update_task( TaskStatus.RUNNING, message=f"Creating NFS node for " f"{cluster_name} ({cluster_id})") try: add_nodes(client=self.tenant_client, num_nodes=1, node_type=NodeType.NFS, org=org, vdc=vdc, vapp=vapp, catalog_name=catalog_name, template=template, network_name=network_name, num_cpu=num_cpu, memory_in_mb=mb_memory, storage_profile=storage_profile_name, ssh_key_filepath=ssh_key_filepath) except Exception as e: raise NFSNodeCreationError("Error creating NFS node:", str(e)) self._update_task( TaskStatus.SUCCESS, message=f"Created cluster {cluster_name} ({cluster_id})") except (MasterNodeCreationError, WorkerNodeCreationError, NFSNodeCreationError, ClusterJoiningError, ClusterInitializationError, ClusterOperationError) as e: if rollback: msg = f"Error creating cluster {cluster_name}. " \ f"Deleting cluster (rollback=True)" self._update_task(TaskStatus.RUNNING, message=msg) LOGGER.info(msg) try: cluster = get_cluster(self.tenant_client, cluster_name, cluster_id=cluster_id, org_name=org_name, ovdc_name=ovdc_name) self._delete_cluster(cluster_name=cluster_name, cluster_vdc_href=cluster['vdc_href']) except Exception: LOGGER.error(f"Failed to delete cluster {cluster_name}", exc_info=True) LOGGER.error(f"Error creating cluster {cluster_name}", exc_info=True) error_obj = error_to_json(e) stack_trace = ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) # noqa: E501 self._update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501 stack_trace=stack_trace) # raising an exception here prints a stacktrace to server console except Exception as e: LOGGER.error(f"Unknown error creating cluster {cluster_name}", exc_info=True) error_obj = error_to_json(e) stack_trace = ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) # noqa: E501 self._update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501 stack_trace=stack_trace) finally: self.logout_sys_admin_client()
def create_cluster_thread(self): network_name = self.req_spec.get(RequestKey.NETWORK_NAME) try: clusters = load_from_metadata(self.tenant_client, name=self.cluster_name) if len(clusters) != 0: raise ClusterAlreadyExistsError(f"Cluster {self.cluster_name} " "already exists.") org_resource = self.tenant_client.get_org_by_name( self.req_spec.get(RequestKey.ORG_NAME)) org = Org(self.tenant_client, resource=org_resource) vdc_resource = org.get_vdc(self.req_spec.get(RequestKey.OVDC_NAME)) vdc = VDC(self.tenant_client, resource=vdc_resource) template = self._get_template() self._update_task( TaskStatus.RUNNING, message=f"Creating cluster vApp {self.cluster_name}" f"({self.cluster_id})") try: vapp_resource = vdc.create_vapp( self.cluster_name, description=f"cluster {self.cluster_name}", network=network_name, fence_mode='bridged') except Exception as e: raise ClusterOperationError( "Error while creating vApp:", str(e)) self.tenant_client.get_task_monitor().wait_for_status( vapp_resource.Tasks.Task[0]) tags = {} tags['cse.cluster.id'] = self.cluster_id tags['cse.version'] = pkg_resources.require( 'container-service-extension')[0].version tags['cse.template'] = template['name'] vapp = VApp(self.tenant_client, href=vapp_resource.get('href')) for k, v in tags.items(): task = vapp.set_metadata('GENERAL', 'READWRITE', k, v) self.tenant_client.get_task_monitor().wait_for_status(task) self._update_task( TaskStatus.RUNNING, message=f"Creating master node for {self.cluster_name}" f"({self.cluster_id})") vapp.reload() server_config = get_server_runtime_config() try: add_nodes(1, template, NodeType.MASTER, server_config, self.tenant_client, org, vdc, vapp, self.req_spec) except Exception as e: raise MasterNodeCreationError( "Error while adding master node:", str(e)) self._update_task( TaskStatus.RUNNING, message=f"Initializing cluster {self.cluster_name}" f"({self.cluster_id})") vapp.reload() init_cluster(server_config, vapp, template) master_ip = get_master_ip(server_config, vapp, template) task = vapp.set_metadata('GENERAL', 'READWRITE', 'cse.master.ip', master_ip) self.tenant_client.get_task_monitor().wait_for_status(task) if self.req_spec.get(RequestKey.NUM_WORKERS) > 0: self._update_task( TaskStatus.RUNNING, message=f"Creating " f"{self.req_spec.get(RequestKey.NUM_WORKERS)} " f"node(s) for " f"{self.cluster_name}({self.cluster_id})") try: add_nodes(self.req_spec.get(RequestKey.NUM_WORKERS), template, NodeType.WORKER, server_config, self.tenant_client, org, vdc, vapp, self.req_spec) except Exception as e: raise WorkerNodeCreationError( "Error while creating worker node:", str(e)) self._update_task( TaskStatus.RUNNING, message=f"Adding " f"{self.req_spec.get(RequestKey.NUM_WORKERS)} " f"node(s) to " f"{self.cluster_name}({self.cluster_id})") vapp.reload() join_cluster(server_config, vapp, template) if self.req_spec.get(RequestKey.ENABLE_NFS): self._update_task( TaskStatus.RUNNING, message=f"Creating NFS node for {self.cluster_name}" f"({self.cluster_id})") try: add_nodes(1, template, NodeType.NFS, server_config, self.tenant_client, org, vdc, vapp, self.req_spec) except Exception as e: raise NFSNodeCreationError( "Error while creating NFS node:", str(e)) self._update_task( TaskStatus.SUCCESS, message=f"Created cluster {self.cluster_name}" f"({self.cluster_id})") except (MasterNodeCreationError, WorkerNodeCreationError, NFSNodeCreationError, ClusterJoiningError, ClusterInitializationError, ClusterOperationError) as e: LOGGER.error(traceback.format_exc()) error_obj = error_to_json(e) stack_trace = \ ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) self._update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE_KEY] [ERROR_DESCRIPTION_KEY], stack_trace=stack_trace) raise e except Exception as e: LOGGER.error(traceback.format_exc()) error_obj = error_to_json(e) stack_trace = \ ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) self._update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501 stack_trace=stack_trace) finally: self._disconnect_sys_admin()
def create_cluster_thread(self): network_name = self.body['network'] try: clusters = load_from_metadata(self.tenant_client, name=self.cluster_name) if len(clusters) != 0: raise ClusterAlreadyExistsError(f"Cluster {self.cluster_name} " "already exists.") org_resource = self.tenant_client.get_org() org = Org(self.tenant_client, resource=org_resource) vdc_resource = org.get_vdc(self.body['vdc']) vdc = VDC(self.tenant_client, resource=vdc_resource) template = self.get_template() self.update_task(TaskStatus.RUNNING, message='Creating cluster vApp %s(%s)' % (self.cluster_name, self.cluster_id)) try: vapp_resource = vdc.create_vapp(self.cluster_name, description='cluster %s' % self.cluster_name, network=network_name, fence_mode='bridged') except Exception as e: raise ClusterOperationError('Error while creating vApp:', str(e)) self.tenant_client.get_task_monitor().wait_for_status( vapp_resource.Tasks.Task[0]) tags = {} tags['cse.cluster.id'] = self.cluster_id tags['cse.version'] = pkg_resources.require( 'container-service-extension')[0].version tags['cse.template'] = template['name'] vapp = VApp(self.tenant_client, href=vapp_resource.get('href')) for k, v in tags.items(): task = vapp.set_metadata('GENERAL', 'READWRITE', k, v) self.tenant_client.get_task_monitor().wait_for_status(task) self.update_task(TaskStatus.RUNNING, message='Creating master node for %s(%s)' % (self.cluster_name, self.cluster_id)) vapp.reload() server_config = get_server_runtime_config() try: add_nodes(1, template, TYPE_MASTER, server_config, self.tenant_client, org, vdc, vapp, self.body) except Exception as e: raise MasterNodeCreationError( "Error while adding master node:", str(e)) self.update_task(TaskStatus.RUNNING, message='Initializing cluster %s(%s)' % (self.cluster_name, self.cluster_id)) vapp.reload() init_cluster(server_config, vapp, template) master_ip = get_master_ip(server_config, vapp, template) task = vapp.set_metadata('GENERAL', 'READWRITE', 'cse.master.ip', master_ip) self.tenant_client.get_task_monitor().wait_for_status(task) if self.body['node_count'] > 0: self.update_task(TaskStatus.RUNNING, message='Creating %s node(s) for %s(%s)' % (self.body['node_count'], self.cluster_name, self.cluster_id)) try: add_nodes(self.body['node_count'], template, TYPE_NODE, server_config, self.tenant_client, org, vdc, vapp, self.body) except Exception as e: raise WorkerNodeCreationError( "Error while creating worker node:", str(e)) self.update_task(TaskStatus.RUNNING, message='Adding %s node(s) to %s(%s)' % (self.body['node_count'], self.cluster_name, self.cluster_id)) vapp.reload() join_cluster(server_config, vapp, template) if self.body['enable_nfs']: self.update_task(TaskStatus.RUNNING, message='Creating NFS node for %s(%s)' % (self.cluster_name, self.cluster_id)) try: add_nodes(1, template, TYPE_NFS, server_config, self.tenant_client, org, vdc, vapp, self.body) except Exception as e: raise NFSNodeCreationError( "Error while creating NFS node:", str(e)) self.update_task(TaskStatus.SUCCESS, message='Created cluster %s(%s)' % (self.cluster_name, self.cluster_id)) except (MasterNodeCreationError, WorkerNodeCreationError, NFSNodeCreationError, ClusterJoiningError, ClusterInitializationError, ClusterOperationError) as e: LOGGER.error(traceback.format_exc()) error_obj = error_to_json(e) stack_trace = ''.join(error_obj[ERROR_MESSAGE][ERROR_STACKTRACE]) self.update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE][ERROR_DESCRIPTION], stack_trace=stack_trace) raise e except Exception as e: LOGGER.error(traceback.format_exc()) error_obj = error_to_json(e) stack_trace = ''.join(error_obj[ERROR_MESSAGE][ERROR_STACKTRACE]) self.update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE][ERROR_DESCRIPTION], stack_trace=stack_trace) finally: self._disconnect_sys_admin()