def test_import_gluster(): tendrlNS = init() # import pdb; pdb.set_trace(); NS.compiled_definitions = tendrlNS.current_ns.definitions parameters = maps.NamedDict(job_id=1, flow_id=1) ret_val, err = gluster_help.import_gluster(parameters) assert ret_val is False assert err is not None NS.config.data['package_source_type'] = 'pip' with patch.object(ansible_module_runner.AnsibleRunner, 'run', run): ret, err = gluster_help.import_gluster(parameters) assert ret is False with patch.object(ansible_module_runner.AnsibleRunner, 'run', return_value=({"rc": 1, "msg": None}, None)): ret, err = gluster_help.import_gluster(parameters) assert ret is False NS.config.data['package_source_type'] = 'rpm' with patch.object(ansible_module_runner.AnsibleRunner, 'run', return_value=({"rc": 0, "msg": None}, None)): with patch.object(__builtin__, 'open', open): with patch.object(cmd_utils.Command, 'run', return_value=("err", "", 1)): ret, err = gluster_help.import_gluster(parameters) assert ret is False with patch.object(ansible_module_runner.AnsibleRunner, 'run', return_value=({"rc": 0, "msg": None}, None)): with patch.object(__builtin__, 'open', open): with patch.object(cmd_utils.Command, 'run', return_value=(None, "", 0)): with patch.object(ServiceStatus, 'status', return_value=True): ret, err = gluster_help.import_gluster(parameters) assert ret is True
def test_import_gluster(): tendrlNS = init() # import pdb; pdb.set_trace(); NS.compiled_definitions = tendrlNS.current_ns.definitions parameters = maps.NamedDict(job_id=1, flow_id=1) ret_val, err = gluster_help.import_gluster(parameters) assert ret_val is False assert err is not None NS.config.data['package_source_type'] = 'pip' with patch.object(ansible_module_runner.AnsibleRunner, 'run', run): ret, err = gluster_help.import_gluster(parameters) assert ret is False with patch.object(ansible_module_runner.AnsibleRunner, 'run', return_value=({ "rc": 1, "msg": None }, None)): ret, err = gluster_help.import_gluster(parameters) assert ret is False NS.config.data['package_source_type'] = 'rpm' with patch.object(ansible_module_runner.AnsibleRunner, 'run', return_value=({ "rc": 0, "msg": None }, None)): with patch.object(__builtin__, 'open', open): with patch.object(cmd_utils.Command, 'run', return_value=("err", "", 1)): ret, err = gluster_help.import_gluster(parameters) assert ret is False with patch.object(ansible_module_runner.AnsibleRunner, 'run', return_value=({ "rc": 0, "msg": None }, None)): with patch.object(__builtin__, 'open', open): with patch.object(cmd_utils.Command, 'run', return_value=(None, "", 0)): with patch.object(ServiceStatus, 'status', return_value=True): ret, err = gluster_help.import_gluster(parameters) assert ret is True
def test_import_gluster(): tendrlNS = init() NS.compiled_definitions = tendrlNS.current_ns.definitions parameters = maps.NamedDict(job_id=1, flow_id=1) assert gluster_help.import_gluster(parameters) is False NS.config.data['package_source_type'] = 'pip' with patch.object(ansible_module_runner.AnsibleRunner, 'run', run) as mock_run: ret = gluster_help.import_gluster(parameters) assert ret is False with patch.object(ansible_module_runner.AnsibleRunner, 'run', return_value=True) as mock_run: with patch.object(__builtin__, 'open', open) as mock_open: ret = gluster_help.import_gluster(parameters) with patch.object(ansible_module_runner, 'AnsibleRunner', ansible) as mock_ansible: with pytest.raises(ansible_module_runner.AnsibleModuleNotFound): ret = gluster_help.import_gluster(parameters) NS.config.data['package_source_type'] = 'rpm' with patch.object(ansible_module_runner.AnsibleRunner, 'run', run) as mock_run: ret = gluster_help.import_gluster(parameters) assert ret is False
def run(self): try: # Lock nodes create_cluster_utils.acquire_node_lock(self.parameters) integration_id = self.parameters['TendrlContext.integration_id'] sds_name = self.parameters['DetectedCluster.sds_pkg_name'] if not self.parameters.get('import_after_expand', False) and \ not self.parameters.get('import_after_create', False): # check if gdeploy in already provisioned in this cluster # if no it has to be provisioned here if sds_name.find("gluster") > -1 and \ not self.parameters.get("gdeploy_provisioned", False) and \ not self._probe_and_mark_provisioner( self.parameters["Node[]"], integration_id ): create_cluster_utils.install_gdeploy() create_cluster_utils.install_python_gdeploy() ssh_job_ids = create_cluster_utils.gluster_create_ssh_setup_jobs( self.parameters) while True: gevent.sleep(3) all_status = {} for job_id in ssh_job_ids: all_status[job_id] = NS._int.client.read( "/queue/%s/status" % job_id).value _failed = { _jid: status for _jid, status in all_status.iteritems() if status == "failed" } if _failed: raise AtomExecutionFailedError( "SSH setup failed for jobs %s cluster %s" % (str(_failed), integration_id)) if all([ status == "finished" for status in all_status.values() ]): Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "SSH setup completed for all nodes in cluster %s" % integration_id })) # set this node as gluster provisioner tags = ["provisioner/%s" % integration_id] NS.node_context = NS.node_context.load() tags += NS.node_context.tags NS.node_context.tags = list(set(tags)) NS.node_context.save() # set gdeploy_provisioned to true so that no other nodes # tries to configure gdeploy self.parameters['gdeploy_provisioned'] = True break NS.tendrl_context = NS.tendrl_context.load() NS.tendrl_context.integration_id = integration_id _detected_cluster = NS.tendrl.objects.DetectedCluster().load() NS.tendrl_context.cluster_id = _detected_cluster.detected_cluster_id NS.tendrl_context.cluster_name = _detected_cluster.detected_cluster_name NS.tendrl_context.sds_name = _detected_cluster.sds_pkg_name NS.tendrl_context.sds_version = _detected_cluster.sds_pkg_version NS.tendrl_context.save() Event( Message(job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Registered Node %s with cluster %s" % (NS.node_context.node_id, NS.tendrl_context.integration_id) })) node_list = self.parameters['Node[]'] cluster_nodes = [] if len(node_list) > 1: # This is the master node for this flow for node in node_list: if NS.node_context.node_id != node: new_params = self.parameters.copy() new_params['Node[]'] = [node] # create same flow for each node in node list except $this payload = { "tags": ["tendrl/node_%s" % node], "run": "tendrl.flows.ImportCluster", "status": "new", "parameters": new_params, "parent": self.parameters['job_id'], "type": "node" } _job_id = str(uuid.uuid4()) cluster_nodes.append(_job_id) Job(job_id=_job_id, status="new", payload=payload).save() Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Importing (job: %s) Node %s to cluster %s" % (_job_id, node, integration_id) })) if "ceph" in sds_name.lower(): node_context = NS.node_context.load() is_mon = False for tag in node_context.tags: mon_tag = NS.compiled_definitions.get_parsed_defs( )['namespace.tendrl']['tags']['ceph-mon'] if mon_tag in tag: is_mon = True if is_mon: # Check if minimum required version of underlying ceph # cluster met. If not fail the import task detected_cluster = NS.tendrl.objects.DetectedCluster( ).load() detected_cluster_ver = detected_cluster.sds_pkg_version.split( '.') maj_ver = detected_cluster_ver[0] min_ver = detected_cluster_ver[1] reqd_ceph_ver = NS.compiled_definitions.get_parsed_defs( )['namespace.tendrl']['min_reqd_ceph_ver'] req_maj_ver, req_min_ver, req_rel = reqd_ceph_ver.split( '.') Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Check: Minimum required version (%s.%s.%s) of Ceph Storage" % (req_maj_ver, req_min_ver, req_rel) })) if int(maj_ver) < int(req_maj_ver) or \ int(min_ver) < int(req_min_ver): Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="error", publisher=NS.publisher_id, payload={ "message": "Error: Minimum required version (%s.%s.%s) " "doesnt match that of detected Ceph Storage (%s.%s.%s)" % (req_maj_ver, req_min_ver, req_rel, maj_ver, min_ver, 0) })) raise FlowExecutionFailedError( "Detected ceph version: %s" " is lesser than required version: %s" % (detected_cluster.sds_pkg_version, reqd_ceph_ver)) import_ceph(self.parameters) else: # Check if minimum required version of underlying gluster # cluster met. If not fail the import task detected_cluster = NS.tendrl.objects.DetectedCluster().load() detected_cluster_ver = detected_cluster.sds_pkg_version.split( '.') maj_ver = detected_cluster_ver[0] min_ver = detected_cluster_ver[1] reqd_gluster_ver = NS.compiled_definitions.get_parsed_defs( )['namespace.tendrl']['min_reqd_gluster_ver'] req_maj_ver, req_min_ver, req_rel = reqd_gluster_ver.split('.') Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Check: Minimum required version (%s.%s.%s) of Gluster Storage" % (req_maj_ver, req_min_ver, req_rel) })) if int(maj_ver) < int(req_maj_ver) or \ int(min_ver) < int(req_min_ver): Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="error", publisher=NS.publisher_id, payload={ "message": "Error: Minimum required version (%s.%s.%s) " "doesnt match that of detected Gluster Storage (%s.%s.%s)" % (req_maj_ver, req_min_ver, req_rel, maj_ver, min_ver, 0) })) raise FlowExecutionFailedError( "Detected gluster version: %s" " is lesser than required version: %s" % (detected_cluster.sds_pkg_version, reqd_gluster_ver)) import_gluster(self.parameters) Event( Message(job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Waiting for participant nodes %s to be " "imported %s" % (node_list, integration_id) })) # An import is sucessfull once all Node[] register to # /clusters/:integration_id/nodes/:node_id while True: _all_node_status = [] gevent.sleep(3) for node_id in self.parameters['Node[]']: _status = NS.tendrl.objects.ClusterNodeContext(node_id=node_id).exists() \ and NS.tendrl.objects.ClusterTendrlContext( integration_id=integration_id ).exists() _all_node_status.append(_status) if _all_node_status: if all(_all_node_status): Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Import Cluster completed for all nodes " "in cluster %s" % integration_id })) break Event( Message(job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Sucessfully imported cluster %s" % integration_id })) except Exception as ex: # For traceback Event( ExceptionMessage(priority="error", publisher=NS.publisher_id, payload={ "message": ex.message, "exception": ex })) # raising exception to mark job as failed raise ex finally: # release lock create_cluster_utils.release_node_lock(self.parameters) return True
def run(self): try: integration_id = self.parameters['TendrlContext.integration_id'] _cluster = NS.tendrl.objects.Cluster( integration_id=integration_id ).load() # Lock nodes flow_utils.acquire_node_lock(self.parameters) NS.tendrl_context = NS.tendrl_context.load() # TODO(team) when Tendrl supports create/expand/shrink cluster # setup passwordless ssh for all gluster nodes with given # integration_id (check # /indexes/tags/tendrl/integration/$integration_id for list of # nodes in cluster node_list = self.parameters['Node[]'] cluster_nodes = [] if len(node_list) > 1: # This is the master node for this flow for node in node_list: if NS.node_context.node_id != node: new_params = self.parameters.copy() new_params['Node[]'] = [node] # create same flow for each node in node list except # $this payload = {"tags": ["tendrl/node_%s" % node], "run": "tendrl.flows.ImportCluster", "status": "new", "parameters": new_params, "parent": self.parameters['job_id'], "type": "node" } _job_id = str(uuid.uuid4()) cluster_nodes.append(_job_id) NS.tendrl.objects.Job( job_id=_job_id, status="new", payload=payload ).save() logger.log( "info", NS.publisher_id, {"message": "ImportCluster %s (jobID: %s) :" "importing host %s" % (_cluster.short_name, _job_id, node)}, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'] ) # Check if minimum required version of underlying gluster # cluster met. If not fail the import task # A sample output from "rpm -qa | grep glusterfs-server" # looks as below # `glusterfs-server-3.8.4-54.4.el7rhgs.x86_64` # In case of upstream build the format could be as below # `glusterfs-server-4.1dev-0.203.gitc3e1a2e.el7.centos.x86_64` # `glusterfs-server-3.12.8-0.0.el7.centos.x86_64.rpm` cmd = subprocess.Popen( 'rpm -q glusterfs-server', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) out, err = cmd.communicate() if out in [None, ""] or err: logger.log( "error", NS.publisher_id, {"message": "Failed to detect underlying cluster version"}, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'] ) return False lines = out.split('\n') build_no = None req_build_no = None ver_det = lines[0].split('glusterfs-server-')[-1].split('.') maj_ver = ver_det[0] min_ver = ver_det[1] if 'dev' in min_ver: min_ver = min_ver[0] rel = ver_det[2] if '-' in rel: build_no = rel.split('-')[-1] rel = rel.split('-')[0] reqd_gluster_ver = NS.compiled_definitions.get_parsed_defs()[ 'namespace.tendrl' ]['min_reqd_gluster_ver'] req_maj_ver, req_min_ver, req_rel = reqd_gluster_ver.split('.') if '-' in req_rel: req_build_no = req_rel.split('-')[-1] req_rel = req_rel.split('-')[0] logger.log( "info", NS.publisher_id, {"message": "Checking minimum required version (" "%s.%s.%s) of Gluster Storage" % (req_maj_ver, req_min_ver, req_rel)}, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'] ) ver_check_failed = False if int(maj_ver) < int(req_maj_ver): ver_check_failed = True else: if int(maj_ver) == int(req_maj_ver): if int(min_ver) < int(req_min_ver): ver_check_failed = True else: if int(min_ver) == int(req_min_ver): if int(rel) < int(req_rel): ver_check_failed = True else: if int(rel) == int(req_rel): if build_no is not None and \ req_build_no is not None and \ int(build_no) < int(req_build_no): ver_check_failed = True if ver_check_failed: logger.log( "error", NS.publisher_id, {"message": "Error: Minimum required version " "(%s.%s.%s) " "doesnt match that of detected Gluster " "Storage (%s.%s.%s)" % (req_maj_ver, req_min_ver, req_rel, maj_ver, min_ver, 0)}, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'] ) return False ret_val, err = import_gluster(self.parameters) if not ret_val: logger.log( "error", NS.publisher_id, {"message": "Error importing the cluster (integration_id:" " %s). Error: %s" % (integration_id, err) }, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'] ) return False if len(node_list) > 1: logger.log( "info", NS.publisher_id, {"message": "ImportCluster %s waiting for hosts %s " "to be imported" % (_cluster.short_name, node_list)}, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'] ) loop_count = 0 # Wait for (no of nodes) * 6 minutes for import to complete wait_count = (len(node_list) - 1) * 36 while True: child_jobs_failed = [] parent_job = NS.tendrl.objects.Job( job_id=self.parameters['job_id'] ).load() if loop_count >= wait_count: logger.log( "error", NS.publisher_id, {"message": "Import jobs on cluster(%s) not yet " "complete on all nodes(%s). Timing out." % (_cluster.short_name, str(node_list))}, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'] ) # Marking child jobs as failed which did not complete # as the parent job has timed out. This has to be done # explicitly because these jobs will still be processed # by the node-agent, and will keep it busy, which might # defer the new jobs or lead to their timeout. for child_job_id in parent_job.children: child_job = NS.tendrl.objects.Job( job_id=child_job_id ).load() if child_job.status not in ["finished", "failed"]: child_job.status = "failed" child_job.save() return False time.sleep(10) completed = True for child_job_id in parent_job.children: child_job = NS.tendrl.objects.Job( job_id=child_job_id ).load() if child_job.status not in ["finished", "failed"]: completed = False elif child_job.status == "failed": child_jobs_failed.append(child_job.job_id) if completed: break else: loop_count += 1 continue if len(child_jobs_failed) > 0: _msg = "Child jobs failed are %s" % child_jobs_failed logger.log( "error", NS.publisher_id, {"message": _msg}, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'] ) return False except Exception as ex: # For traceback Event( ExceptionMessage( priority="error", publisher=NS.publisher_id, payload={ "message": ex.message, "exception": ex } ) ) # raising exception to mark job as failed raise ex finally: # release lock flow_utils.release_node_lock(self.parameters) return True
def run(self): try: integration_id = self.parameters['TendrlContext.integration_id'] _cluster = NS.tendrl.objects.Cluster( integration_id=integration_id ).load() # Lock nodes flow_utils.acquire_node_lock(self.parameters) NS.tendrl_context = NS.tendrl_context.load() # TODO(team) when Tendrl supports create/expand/shrink cluster # setup passwordless ssh for all gluster nodes with given # integration_id (check # /indexes/tags/tendrl/integration/$integration_id for list of # nodes in cluster node_list = self.parameters['Node[]'] cluster_nodes = [] if len(node_list) > 1: # This is the master node for this flow # Find number of volumes in the cluster to run profiling job cmd = cmd_utils.Command('gluster volume list') out, err, rc = cmd.run() if not err: volumes = filter(None, out.split("\n")) ret_val, err = enable_disable_volume_profiling( volumes, self.parameters) if not ret_val: logger.log( "error", NS.publisher_id, {"message": "Failed to %s profiling. Error: %s" % (_cluster.volume_profiling_flag, err) }, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'] ) return False else: logger.log( "warning", NS.publisher_id, {"message": "Unable to find list of volumes in a " "cluster, Skipping volume enable/disable " "profiling flow. error: %s" % err}, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'] ) # continue the import flow for node in node_list: if NS.node_context.node_id != node: new_params = self.parameters.copy() new_params['Node[]'] = [node] # create same flow for each node in node list except # $this payload = {"tags": ["tendrl/node_%s" % node], "run": "tendrl.flows.ImportCluster", "status": "new", "parameters": new_params, "parent": self.parameters['job_id'], "type": "node" } _job_id = str(uuid.uuid4()) cluster_nodes.append(_job_id) NS.tendrl.objects.Job( job_id=_job_id, status="new", payload=payload ).save() logger.log( "info", NS.publisher_id, {"message": "ImportCluster %s (jobID: %s) :" "importing host %s" % (_cluster.short_name, _job_id, node)}, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'] ) # Check if minimum required version of underlying gluster # cluster met. If not fail the import task # A sample output from "rpm -qa | grep glusterfs-server" # looks as below # `glusterfs-server-3.8.4-54.4.el7rhgs.x86_64` # In case of upstream build the format could be as below # `glusterfs-server-4.1dev-0.203.gitc3e1a2e.el7.centos.x86_64` # `glusterfs-server-3.12.8-0.0.el7.centos.x86_64.rpm` cmd = subprocess.Popen( 'rpm -q glusterfs-server', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) out, err = cmd.communicate() if out in [None, ""] or err: logger.log( "error", NS.publisher_id, {"message": "Failed to detect underlying cluster version"}, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'] ) return False lines = out.split('\n') build_no = None req_build_no = None ver_det = lines[0].split('glusterfs-server-')[-1].split('.') maj_ver = ver_det[0] min_ver = ver_det[1] if 'dev' in min_ver: min_ver = min_ver[0] rel = ver_det[2] if '-' in rel: build_no = rel.split('-')[-1] rel = rel.split('-')[0] reqd_gluster_ver = NS.compiled_definitions.get_parsed_defs()[ 'namespace.tendrl' ]['min_reqd_gluster_ver'] req_maj_ver, req_min_ver, req_rel = reqd_gluster_ver.split('.') if '-' in req_rel: req_build_no = req_rel.split('-')[-1] req_rel = req_rel.split('-')[0] logger.log( "info", NS.publisher_id, {"message": "Checking minimum required version (" "%s.%s.%s) of Gluster Storage" % (req_maj_ver, req_min_ver, req_rel)}, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'] ) ver_check_failed = False if int(maj_ver) < int(req_maj_ver): ver_check_failed = True else: if int(maj_ver) == int(req_maj_ver): if int(min_ver) < int(req_min_ver): ver_check_failed = True else: if int(min_ver) == int(req_min_ver): if int(rel) < int(req_rel): ver_check_failed = True else: if int(rel) == int(req_rel): if build_no is not None and \ req_build_no is not None and \ int(build_no) < int(req_build_no): ver_check_failed = True if ver_check_failed: logger.log( "error", NS.publisher_id, {"message": "Error: Minimum required version " "(%s.%s.%s) " "doesnt match that of detected Gluster " "Storage (%s.%s.%s)" % (req_maj_ver, req_min_ver, req_rel, maj_ver, min_ver, 0)}, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'] ) return False ret_val, err = import_gluster(self.parameters) if not ret_val: logger.log( "error", NS.publisher_id, {"message": "Error importing the cluster (integration_id:" " %s). Error: %s" % (integration_id, err) }, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'] ) return False if len(node_list) > 1: # find number of volumes in a cluster cmd = cmd_utils.Command('gluster volume list') out, err, rc = cmd.run() # default intervel is 8 min # 5 sec sleep for one count increment (480 / 5) wait_count = 96 if not err: volumes = out.split("\n") # 15 sec for each volume wait_count = wait_count + (len(volumes) * 3) logger.log( "info", NS.publisher_id, {"message": "ImportCluster %s waiting for hosts %s " "to be imported" % (_cluster.short_name, node_list)}, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'] ) loop_count = 0 # Wait for (no of nodes) * 6 minutes for import to complete while True: child_jobs_failed = [] parent_job = NS.tendrl.objects.Job( job_id=self.parameters['job_id'] ).load() if loop_count >= wait_count: logger.log( "error", NS.publisher_id, {"message": "Import jobs on cluster(%s) not yet " "complete on all nodes(%s). Timing out." % (_cluster.short_name, str(node_list))}, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'] ) # Marking child jobs as failed which did not complete # as the parent job has timed out. This has to be done # explicitly because these jobs will still be processed # by the node-agent, and will keep it busy, which might # defer the new jobs or lead to their timeout. for child_job_id in parent_job.children: child_job = NS.tendrl.objects.Job( job_id=child_job_id ).load() if child_job.status not in ["finished", "failed"]: child_job.status = "failed" child_job.save() return False time.sleep(10) completed = True for child_job_id in parent_job.children: child_job = NS.tendrl.objects.Job( job_id=child_job_id ).load() if child_job.status not in ["finished", "failed"]: completed = False elif child_job.status == "failed": child_jobs_failed.append(child_job.job_id) if completed: break else: loop_count += 1 continue if len(child_jobs_failed) > 0: _msg = "Child jobs failed are %s" % child_jobs_failed logger.log( "error", NS.publisher_id, {"message": _msg}, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'] ) return False except Exception as ex: # For traceback Event( ExceptionMessage( priority="error", publisher=NS.publisher_id, payload={ "message": ex.message, "exception": ex } ) ) # raising exception to mark job as failed raise ex finally: # release lock flow_utils.release_node_lock(self.parameters) return True
def run(self): try: integration_id = self.parameters['TendrlContext.integration_id'] # Lock nodes create_cluster_utils.acquire_node_lock(self.parameters) NS.tendrl_context = NS.tendrl_context.load() # TODO(team) when Tendrl supports create/expand/shrink cluster # setup passwordless ssh for all gluster nodes with given # integration_id (check # /indexes/tags/tendrl/integration/$integration_id for list of # nodes in cluster node_list = self.parameters['Node[]'] cluster_nodes = [] if len(node_list) > 1: # This is the master node for this flow for node in node_list: if NS.node_context.node_id != node: new_params = self.parameters.copy() new_params['Node[]'] = [node] # create same flow for each node in node list except # $this payload = {"tags": ["tendrl/node_%s" % node], "run": "tendrl.flows.ImportCluster", "status": "new", "parameters": new_params, "parent": self.parameters['job_id'], "type": "node" } _job_id = str(uuid.uuid4()) cluster_nodes.append(_job_id) Job(job_id=_job_id, status="new", payload=payload).save() Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Importing (job: %s) Node %s " "to cluster %s" % (_job_id, node, integration_id) } ) ) # Check if minimum required version of underlying gluster # cluster met. If not fail the import task cluster_ver = \ NS.tendrl_context.sds_version.split('.') maj_ver = cluster_ver[0] min_ver = re.findall(r'\d+', cluster_ver[1])[0] reqd_gluster_ver = NS.compiled_definitions.get_parsed_defs()[ 'namespace.tendrl' ]['min_reqd_gluster_ver'] req_maj_ver, req_min_ver, req_rel = reqd_gluster_ver.split('.') Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Check: Minimum required version (" "%s.%s.%s) of Gluster Storage" % (req_maj_ver, req_min_ver, req_rel) } ) ) ver_check_failed = False if int(maj_ver) < int(req_maj_ver): ver_check_failed = True else: if int(maj_ver) == int(req_maj_ver) and \ int(min_ver) < int(req_min_ver): ver_check_failed = True if ver_check_failed: Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="error", publisher=NS.publisher_id, payload={ "message": "Error: Minimum required version " "(%s.%s.%s) " "doesnt match that of detected Gluster " "Storage (%s.%s.%s)" % (req_maj_ver, req_min_ver, req_rel, maj_ver, min_ver, 0) } ) ) raise FlowExecutionFailedError( "Detected gluster version: %s" " is lesser than required version: %s" % ( NS.tendrl_context.sds_version, reqd_gluster_ver ) ) if not import_gluster(self.parameters): return False if len(node_list) > 1: Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Waiting for participant nodes %s to " "be " "imported %s" % (node_list, integration_id) } ) ) loop_count = 0 # Wait for (no of nodes) * 6 minutes for import to complete wait_count = (len(node_list) - 1) * 36 while True: parent_job = Job(job_id=self.parameters['job_id']).load() if loop_count >= wait_count: Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Import jobs not yet complete " "on all nodes. Timing out. (%s, %s)" % (str(node_list), integration_id) } ) ) return False time.sleep(10) finished = True for child_job_id in parent_job.children: child_job = Job(job_id=child_job_id).load() if child_job.status != "finished": finished = False break if finished: break else: loop_count += 1 continue except Exception as ex: # For traceback Event( ExceptionMessage( priority="error", publisher=NS.publisher_id, payload={ "message": ex.message, "exception": ex } ) ) # raising exception to mark job as failed raise ex finally: # release lock create_cluster_utils.release_node_lock(self.parameters) return True