def run(self): try: # Lock nodes create_cluster_utils.acquire_node_lock(self.parameters) integration_id = self.parameters['TendrlContext.integration_id'] sds_name = self.parameters['DetectedCluster.sds_pkg_name'] if not self.parameters.get('import_after_expand', False) and \ not self.parameters.get('import_after_create', False): # check if gdeploy in already provisioned in this cluster # if no it has to be provisioned here if sds_name.find("gluster") > -1 and \ not self.parameters.get("gdeploy_provisioned", False) and \ not self._probe_and_mark_provisioner( self.parameters["Node[]"], integration_id ): create_cluster_utils.install_gdeploy() create_cluster_utils.install_python_gdeploy() ssh_job_ids = create_cluster_utils.gluster_create_ssh_setup_jobs( self.parameters) while True: gevent.sleep(3) all_status = {} for job_id in ssh_job_ids: all_status[job_id] = NS._int.client.read( "/queue/%s/status" % job_id).value _failed = { _jid: status for _jid, status in all_status.iteritems() if status == "failed" } if _failed: raise AtomExecutionFailedError( "SSH setup failed for jobs %s cluster %s" % (str(_failed), integration_id)) if all([ status == "finished" for status in all_status.values() ]): Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "SSH setup completed for all nodes in cluster %s" % integration_id })) # set this node as gluster provisioner tags = ["provisioner/%s" % integration_id] NS.node_context = NS.node_context.load() tags += NS.node_context.tags NS.node_context.tags = list(set(tags)) NS.node_context.save() # set gdeploy_provisioned to true so that no other nodes # tries to configure gdeploy self.parameters['gdeploy_provisioned'] = True break NS.tendrl_context = NS.tendrl_context.load() NS.tendrl_context.integration_id = integration_id _detected_cluster = NS.tendrl.objects.DetectedCluster().load() NS.tendrl_context.cluster_id = _detected_cluster.detected_cluster_id NS.tendrl_context.cluster_name = _detected_cluster.detected_cluster_name NS.tendrl_context.sds_name = _detected_cluster.sds_pkg_name NS.tendrl_context.sds_version = _detected_cluster.sds_pkg_version NS.tendrl_context.save() Event( Message(job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Registered Node %s with cluster %s" % (NS.node_context.node_id, NS.tendrl_context.integration_id) })) node_list = self.parameters['Node[]'] cluster_nodes = [] if len(node_list) > 1: # This is the master node for this flow for node in node_list: if NS.node_context.node_id != node: new_params = self.parameters.copy() new_params['Node[]'] = [node] # create same flow for each node in node list except $this payload = { "tags": ["tendrl/node_%s" % node], "run": "tendrl.flows.ImportCluster", "status": "new", "parameters": new_params, "parent": self.parameters['job_id'], "type": "node" } _job_id = str(uuid.uuid4()) cluster_nodes.append(_job_id) Job(job_id=_job_id, status="new", payload=payload).save() Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Importing (job: %s) Node %s to cluster %s" % (_job_id, node, integration_id) })) if "ceph" in sds_name.lower(): node_context = NS.node_context.load() is_mon = False for tag in node_context.tags: mon_tag = NS.compiled_definitions.get_parsed_defs( )['namespace.tendrl']['tags']['ceph-mon'] if mon_tag in tag: is_mon = True if is_mon: # Check if minimum required version of underlying ceph # cluster met. If not fail the import task detected_cluster = NS.tendrl.objects.DetectedCluster( ).load() detected_cluster_ver = detected_cluster.sds_pkg_version.split( '.') maj_ver = detected_cluster_ver[0] min_ver = detected_cluster_ver[1] reqd_ceph_ver = NS.compiled_definitions.get_parsed_defs( )['namespace.tendrl']['min_reqd_ceph_ver'] req_maj_ver, req_min_ver, req_rel = reqd_ceph_ver.split( '.') Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Check: Minimum required version (%s.%s.%s) of Ceph Storage" % (req_maj_ver, req_min_ver, req_rel) })) if int(maj_ver) < int(req_maj_ver) or \ int(min_ver) < int(req_min_ver): Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="error", publisher=NS.publisher_id, payload={ "message": "Error: Minimum required version (%s.%s.%s) " "doesnt match that of detected Ceph Storage (%s.%s.%s)" % (req_maj_ver, req_min_ver, req_rel, maj_ver, min_ver, 0) })) raise FlowExecutionFailedError( "Detected ceph version: %s" " is lesser than required version: %s" % (detected_cluster.sds_pkg_version, reqd_ceph_ver)) import_ceph(self.parameters) else: # Check if minimum required version of underlying gluster # cluster met. If not fail the import task detected_cluster = NS.tendrl.objects.DetectedCluster().load() detected_cluster_ver = detected_cluster.sds_pkg_version.split( '.') maj_ver = detected_cluster_ver[0] min_ver = detected_cluster_ver[1] reqd_gluster_ver = NS.compiled_definitions.get_parsed_defs( )['namespace.tendrl']['min_reqd_gluster_ver'] req_maj_ver, req_min_ver, req_rel = reqd_gluster_ver.split('.') Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Check: Minimum required version (%s.%s.%s) of Gluster Storage" % (req_maj_ver, req_min_ver, req_rel) })) if int(maj_ver) < int(req_maj_ver) or \ int(min_ver) < int(req_min_ver): Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="error", publisher=NS.publisher_id, payload={ "message": "Error: Minimum required version (%s.%s.%s) " "doesnt match that of detected Gluster Storage (%s.%s.%s)" % (req_maj_ver, req_min_ver, req_rel, maj_ver, min_ver, 0) })) raise FlowExecutionFailedError( "Detected gluster version: %s" " is lesser than required version: %s" % (detected_cluster.sds_pkg_version, reqd_gluster_ver)) import_gluster(self.parameters) Event( Message(job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Waiting for participant nodes %s to be " "imported %s" % (node_list, integration_id) })) # An import is sucessfull once all Node[] register to # /clusters/:integration_id/nodes/:node_id while True: _all_node_status = [] gevent.sleep(3) for node_id in self.parameters['Node[]']: _status = NS.tendrl.objects.ClusterNodeContext(node_id=node_id).exists() \ and NS.tendrl.objects.ClusterTendrlContext( integration_id=integration_id ).exists() _all_node_status.append(_status) if _all_node_status: if all(_all_node_status): Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Import Cluster completed for all nodes " "in cluster %s" % integration_id })) break Event( Message(job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Sucessfully imported cluster %s" % integration_id })) except Exception as ex: # For traceback Event( ExceptionMessage(priority="error", publisher=NS.publisher_id, payload={ "message": ex.message, "exception": ex })) # raising exception to mark job as failed raise ex finally: # release lock create_cluster_utils.release_node_lock(self.parameters) return True
def run(self): try: # Locking nodes create_cluster_utils.acquire_node_lock(self.parameters) integration_id = self.parameters['TendrlContext.integration_id'] sds_name = self.parameters["TendrlContext.sds_name"] ssh_job_ids = [] if "ceph" in sds_name: ssh_job_ids = create_cluster_utils.ceph_create_ssh_setup_jobs( self.parameters) else: ssh_job_ids = \ create_cluster_utils.gluster_create_ssh_setup_jobs( self.parameters ) while True: gevent.sleep(3) all_status = {} for job_id in ssh_job_ids: # noinspection PyUnresolvedReferences all_status[job_id] = NS._int.client.read( "/queue/%s/status" % job_id).value _failed = { _jid: status for _jid, status in all_status.iteritems() if status == "failed" } if _failed: raise AtomExecutionFailedError( "SSH setup failed for jobs %s cluster %s" % (str(_failed), integration_id)) if all( [status == "finished" for status in all_status.values()]): Event( Message(job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "SSH setup completed for all " "nodes in cluster %s" % integration_id })) # set this node as gluster provisioner if "gluster" in self.parameters["TendrlContext.sds_name"]: tags = ["provisioner/%s" % integration_id] NS.node_context = NS.node_context.load() tags += NS.node_context.tags NS.node_context.tags = list(set(tags)) NS.node_context.save() break Event( Message(job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Starting SDS install and config %s" % integration_id })) # SSH setup jobs finished above, now install sds bits and create # cluster if "ceph" in sds_name: Event( Message(job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Creating Ceph Storage Cluster " "%s" % integration_id })) self.parameters.update({'create_mon_secret': True}) create_ceph(self.parameters) else: Event( Message(job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Creating Gluster Storage " "Cluster %s" % integration_id })) create_gluster(self.parameters) except Exception as ex: # For traceback Event( ExceptionMessage(priority="error", publisher=NS.publisher_id, payload={ "message": ex.message, "exception": ex })) # raising exception to mark job as failed raise ex finally: # releasing nodes if any exception came create_cluster_utils.release_node_lock(self.parameters) return True
def run(self): try: integration_id = self.parameters['TendrlContext.integration_id'] # Lock nodes create_cluster_utils.acquire_node_lock(self.parameters) NS.tendrl_context = NS.tendrl_context.load() # TODO(team) when Tendrl supports create/expand/shrink cluster # setup passwordless ssh for all gluster nodes with given # integration_id (check # /indexes/tags/tendrl/integration/$integration_id for list of # nodes in cluster node_list = self.parameters['Node[]'] cluster_nodes = [] if len(node_list) > 1: # This is the master node for this flow for node in node_list: if NS.node_context.node_id != node: new_params = self.parameters.copy() new_params['Node[]'] = [node] # create same flow for each node in node list except # $this payload = {"tags": ["tendrl/node_%s" % node], "run": "tendrl.flows.ImportCluster", "status": "new", "parameters": new_params, "parent": self.parameters['job_id'], "type": "node" } _job_id = str(uuid.uuid4()) cluster_nodes.append(_job_id) Job(job_id=_job_id, status="new", payload=payload).save() Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Importing (job: %s) Node %s " "to cluster %s" % (_job_id, node, integration_id) } ) ) # Check if minimum required version of underlying gluster # cluster met. If not fail the import task cluster_ver = \ NS.tendrl_context.sds_version.split('.') maj_ver = cluster_ver[0] min_ver = re.findall(r'\d+', cluster_ver[1])[0] reqd_gluster_ver = NS.compiled_definitions.get_parsed_defs()[ 'namespace.tendrl' ]['min_reqd_gluster_ver'] req_maj_ver, req_min_ver, req_rel = reqd_gluster_ver.split('.') Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Check: Minimum required version (" "%s.%s.%s) of Gluster Storage" % (req_maj_ver, req_min_ver, req_rel) } ) ) ver_check_failed = False if int(maj_ver) < int(req_maj_ver): ver_check_failed = True else: if int(maj_ver) == int(req_maj_ver) and \ int(min_ver) < int(req_min_ver): ver_check_failed = True if ver_check_failed: Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="error", publisher=NS.publisher_id, payload={ "message": "Error: Minimum required version " "(%s.%s.%s) " "doesnt match that of detected Gluster " "Storage (%s.%s.%s)" % (req_maj_ver, req_min_ver, req_rel, maj_ver, min_ver, 0) } ) ) raise FlowExecutionFailedError( "Detected gluster version: %s" " is lesser than required version: %s" % ( NS.tendrl_context.sds_version, reqd_gluster_ver ) ) if not import_gluster(self.parameters): return False if len(node_list) > 1: Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Waiting for participant nodes %s to " "be " "imported %s" % (node_list, integration_id) } ) ) loop_count = 0 # Wait for (no of nodes) * 6 minutes for import to complete wait_count = (len(node_list) - 1) * 36 while True: parent_job = Job(job_id=self.parameters['job_id']).load() if loop_count >= wait_count: Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Import jobs not yet complete " "on all nodes. Timing out. (%s, %s)" % (str(node_list), integration_id) } ) ) return False time.sleep(10) finished = True for child_job_id in parent_job.children: child_job = Job(job_id=child_job_id).load() if child_job.status != "finished": finished = False break if finished: break else: loop_count += 1 continue except Exception as ex: # For traceback Event( ExceptionMessage( priority="error", publisher=NS.publisher_id, payload={ "message": ex.message, "exception": ex } ) ) # raising exception to mark job as failed raise ex finally: # release lock create_cluster_utils.release_node_lock(self.parameters) return True
def run(self): try: # Lock nodes create_cluster_utils.acquire_node_lock(self.parameters) integration_id = self.parameters['TendrlContext.integration_id'] if integration_id is None: raise FlowExecutionFailedError( "TendrlContext.integration_id cannot be empty") supported_sds = NS.compiled_definitions.get_parsed_defs( )['namespace.tendrl']['supported_sds'] sds_name = self.parameters["TendrlContext.sds_name"] if sds_name not in supported_sds: raise FlowExecutionFailedError("SDS (%s) not supported" % sds_name) ssh_job_ids = [] if "ceph" in sds_name: ssh_job_ids = create_cluster_utils.ceph_create_ssh_setup_jobs( self.parameters) else: ssh_job_ids = \ create_cluster_utils.gluster_create_ssh_setup_jobs( self.parameters, skip_current_node=True ) while True: time.sleep(3) all_status = {} for job_id in ssh_job_ids: all_status[job_id] = NS._int.client.read( "/queue/%s/status" % job_id).value _failed = { _jid: status for _jid, status in all_status.iteritems() if status == "failed" } if _failed: raise FlowExecutionFailedError( "SSH setup failed for jobs %s cluster %s" % (str(_failed), integration_id)) if all( [status == "finished" for status in all_status.values()]): Event( Message(job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "SSH setup completed for all " "nodes in cluster %s" % integration_id })) break # SSH setup jobs finished above, now install sds # bits and create cluster if "ceph" in sds_name: Event( Message(job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Expanding ceph cluster %s" % integration_id })) ceph_help.expand_cluster(self.parameters) else: Event( Message(job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Expanding Gluster Storage" " Cluster %s" % integration_id })) gluster_help.expand_gluster(self.parameters) Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "SDS install/config completed on newly " "expanded nodes, Please wait while " "tendrl-node-agents detect sds details on the newly " "expanded nodes %s" % self.parameters['Node[]'] })) # Wait till detected cluster in populated for nodes while True: time.sleep(3) all_status = [] detected_cluster = "" different_cluster_id = False dc = "" for node in self.parameters['Node[]']: try: dc = NS._int.client.read( "/nodes/%s/DetectedCluster/detected_cluster_id" % node).value if not detected_cluster: detected_cluster = dc else: if detected_cluster != dc: all_status.append(False) different_cluster_id = True break all_status.append(True) except etcd.EtcdKeyNotFound: all_status.append(False) if different_cluster_id: raise FlowExecutionFailedError( "Seeing different detected cluster id in" " different nodes. %s and %s" % (detected_cluster, dc)) if all_status: if all(all_status): break # Create the params list for import cluster flow new_params = dict() new_params['Node[]'] = self.parameters['Node[]'] new_params['TendrlContext.integration_id'] = integration_id # Get node context for one of the nodes from list sds_pkg_name = NS._int.client.read( "nodes/%s/DetectedCluster/" "sds_pkg_name" % self.parameters['Node[]'][0]).value new_params['import_after_expand'] = True sds_pkg_version = NS._int.client.read( "nodes/%s/DetectedCluster/sds_pkg_" "version" % self.parameters['Node[]'][0]).value new_params['DetectedCluster.sds_pkg_name'] = \ sds_pkg_name new_params['DetectedCluster.sds_pkg_version'] = \ sds_pkg_version tags = [] for node in self.parameters['Node[]']: tags.append("tendrl/node_%s" % node) payload = { "tags": tags, "run": "tendrl.flows.ImportCluster", "status": "new", "parameters": new_params, "parent": self.parameters['job_id'], "type": "node" } _job_id = str(uuid.uuid4()) # release lock before import cluster create_cluster_utils.release_node_lock(self.parameters) Job(job_id=_job_id, status="new", payload=payload).save() Event( Message(job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Please wait while Tendrl imports (" "job_id: %s) newly expanded " "%s storage nodes %s" % (_job_id, sds_pkg_name, integration_id) })) except Exception as ex: Event( ExceptionMessage(priority="error", publisher=NS.publisher_id, payload={ "message": ex.message, "exception": ex })) # raising exception to mark job as failed raise ex finally: # release lock if any exception came create_cluster_utils.release_node_lock(self.parameters)