def load(*args): parent = Job(job_id="Test Parent Job Id", status="Active", payload=maps.NamedDict()) if args[0]: parent.errors = "Error Message" parent.children = ["Test_child"] return parent
def run(self): # Generate the journal mapping for the nodes mapping = utils.generate_journal_mapping( self.parameters['Cluster.node_configuration'], integration_id=self.parameters.get("TendrlContext.integration_id")) # Update output dict job = Job(job_id=self.job_id).load() job.output[self.__class__.__name__] = json.dumps(mapping) job.save()
def run(self): integration_id = self.parameters['TendrlContext.integration_id'] if "provisioner/%s" % integration_id not in NS.node_context.tags: return True _job_id = str(uuid.uuid4()) payload = { "tags": ["tendrl/integration/monitoring"], "run": "monitoring.flows.SetupClusterAlias", "status": "new", "parameters": self.parameters, "type": "monitoring", "parent": self.parameters['job_id'] } Job(job_id=_job_id, status="new", payload=payload).save() # Wait for 2 mins for the job to complete loop_count = 0 wait_count = 24 while True: child_job_failed = False if loop_count >= wait_count: logger.log( "error", NS.publisher_id, { "message": "Setting up cluster alias" "not yet complete. Timing out. (%s)" % integration_id }, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], ) return False time.sleep(5) finished = True job = Job(job_id=_job_id).load() if job.status not in ["finished", "failed"]: finished = False elif job.status == "failed": child_job_failed = True if finished: break else: loop_count += 1 continue if child_job_failed: _msg = "Child job setting up cluster alias " \ "failed %s" % _job_id logger.log("error", NS.publisher_id, {"message": _msg}, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id']) return False return True
def test_constructor(): '''Testing for constructor involves checking if all needed variables are declared initialized ''' job = Job() assert job.job_id is None # Passing Dummy Values job = Job(job_id="Test job id", payload="Test Payload", status=True, errors=None, children=None, locked_by=None, output="Job Done") assert job.output == "Job Done"
def initiate_config_generation(node_det): try: job_params = { 'node_ids': [node_det.get('node_id')], "run": 'node_monitoring.flows.ConfigureCollectd', 'type': 'monitoring', "parameters": { 'plugin_name': node_det['plugin'], 'plugin_conf_params': json.dumps(node_det['plugin_conf']).encode('utf-8'), 'Node.fqdn': node_det['fqdn'], 'Service.name': 'collectd', }, } Job( job_id=str(uuid.uuid4()), status='new', payload=job_params, ).save() except (EtcdException, EtcdConnectionFailed, Exception) as ex: raise TendrlPerformanceMonitoringException( 'Failed to intiate monitoring configuration for plugin \ %s on %s with parameters %s.Error %s' % (node_det['plugin'], node_det['fqdn'], json.dumps(node_det['plugin_conf']), str(ex)))
def test_gluster_create_ssh_setup_jobs_fails2(): testParams = maps.NamedDict() testParams['Node[]'] = [] testParams["job_id"] = "test_id" testParams["flow_id"] = "test_id" with patch.object(etcd, "Client", return_value=etcd.Client()) as client: obj = importlib.import_module("tendrl.commons.tests.fixtures.client") NS._int.wclient = obj.Client() NS._int.client = client NS._int.watchers = dict() NS.tendrl = maps.NamedDict() NS.tendrl.objects = maps.NamedDict() NS.tendrl.objects.Job = Job(job_id=1, status="", payload=maps.NamedDict()).save() NS.gluster_provisioner = importlib.import_module( "tendrl.commons.tests.fixtures.plugin").Plugin() with patch.object(NS.gluster_provisioner, 'setup', return_value=["", ""]): with patch.object(authorize_key, 'AuthorizeKey', return_value=MockKey()): with pytest.raises(FlowExecutionFailedError): utils.gluster_create_ssh_setup_jobs( testParams, skip_current_node=False)
def acquire_node_lock(parameters): # check node_id is present for node in parameters['Node[]']: try: NS._int.client.read("/nodes/%s" % node) except EtcdKeyNotFound: raise FlowExecutionFailedError( "Unknown Node %s, cannot lock" % node) # check job is parent or child job = Job(job_id=parameters['job_id']).load() p_job_id = None if "parent" in job.payload: p_job_id = job.payload['parent'] for node in parameters['Node[]']: key = "/nodes/%s/locked_by" % node try: lock_owner_job = NS._int.client.read(key).value # If the parent job has aquired lock on participating nodes, # dont you worry child job :) if p_job_id == lock_owner_job: continue else: raise FlowExecutionFailedError("Cannot proceed further, " "Node (%s) is already locked " "by Job (%s)" % (node, lock_owner_job) ) except EtcdKeyNotFound: # To check what are all the nodes are already locked continue for node in parameters['Node[]']: try: lock_owner_job = NS._int.client.read(key).value if p_job_id == lock_owner_job: continue except EtcdKeyNotFound: lock_owner_job = str(parameters["job_id"]) key = "nodes/%s/locked_by" % node NS._int.client.write(key, lock_owner_job) Event( Message( job_id=parameters['job_id'], flow_id=parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Acquired lock (%s) for Node (%s)" % ( lock_owner_job, node) } ) )
def update_brick_status(self, fqdn, integration_id, status): _job_id = str(uuid.uuid4()) _params = { "TendrlContext.integration_id": integration_id, "Node.fqdn": fqdn, "Brick.status": status } _job_payload = { "tags": ["tendrl/integration/{0}".format(integration_id)], "run": "gluster.flows.UpdateBrickStatus", "status": "new", "parameters": _params, "type": "sds" } Job(job_id=_job_id, status="new", payload=_job_payload).save()
def run(): try: nodes = NS._int.client.read("/nodes") except etcd.EtcdKeyNotFound: return for node in nodes.leaves: node_id = node.key.split('/')[-1] try: NS._int.wclient.write( "/nodes/{0}/NodeContext/status".format(node_id), "DOWN", prevExist=False) _node_context = NS.tendrl.objects.NodeContext( node_id=node_id).load() _tc = NS.tendrl.objects.TendrlContext(node_id=node_id).load() _cluster = NS.tendrl.objects.Cluster( integration_id=_tc.integration_id).load() # Remove stale provisioner tag if _cluster.is_managed == "yes": _tag = "provisioner/%s" % _cluster.integration_id if _tag in _node_context.tags: _index_key = "/indexes/tags/%s" % _tag _node_context.tags.remove(_tag) _node_context.save() etcd_utils.delete(_index_key) _msg = "node_sync, STALE provisioner node found! re-configuring monitoring (job-id: %s) on this node" payload = { "tags": ["tendrl/node_%s" % node_id], "run": "tendrl.flows.ConfigureMonitoring", "status": "new", "parameters": { 'TendrlContext.integration_id': _tc.integration_id }, "type": "node" } _job_id = str(uuid.uuid4()) Job(job_id=_job_id, status="new", payload=payload).save() Event( Message(priority="debug", publisher=NS.publisher_id, payload={"message": _msg % _job_id})) except etcd.EtcdAlreadyExist: pass return
def initiate_config_generation(node_det): try: plugin = NodeMonitoringPlugin(plugin_name=node_det['plugin'], node_id=node_det.get('node_id')) if plugin.exists(): # More powers like fixed retrials can be added here.This is common # point through which all monitoring plugin configuration jobs land # into etcd and hence any action here is reflected to all of them. return job_params = { 'tags': ["tendrl/node_%s" % node_det.get('node_id')], "run": 'node_monitoring.flows.ConfigureCollectd', 'type': 'monitoring', "parameters": { 'plugin_name': node_det['plugin'], 'plugin_conf_params': json.dumps(node_det['plugin_conf']).encode('utf-8'), 'Node.fqdn': node_det['fqdn'], 'Service.name': 'collectd', }, } job_id = str(uuid.uuid4()) Job( job_id=job_id, status='new', payload=job_params, ).save() NodeMonitoringPlugin(plugin_name=node_det['plugin'], node_id=node_det.get('node_id'), job_id=job_id).save(update=False) except (EtcdException, AttributeError) as ex: Event( ExceptionMessage( priority="debug", publisher=NS.publisher_id, payload={ "message": 'Failed to intiate monitoring configuration for' ' plugin %s on %s with parameters %s.' % (node_det['plugin'], node_det['fqdn'], json.dumps(node_det['plugin_conf'])), "exception": ex }))
def initiate_config_generation(node_det): try: plugin = NodeMonitoringPlugin( plugin_name=node_det['plugin'], node_id=node_det.get('node_id') ) if plugin.exists(): # More powers like fixed retrials can be added here.This is common # point through which all monitoring plugin configuration jobs land # into etcd and hence any action here is reflected to all of them. return job_params = { 'node_ids': [node_det.get('node_id')], "run": 'node_monitoring.flows.ConfigureCollectd', 'type': 'monitoring', "parameters": { 'plugin_name': node_det['plugin'], 'plugin_conf_params': json.dumps( node_det['plugin_conf'] ).encode('utf-8'), 'Node.fqdn': node_det['fqdn'], 'Service.name': 'collectd', }, } job_id = str(uuid.uuid4()) Job( job_id=job_id, status='new', payload=job_params, ).save() NodeMonitoringPlugin( plugin_name=node_det['plugin'], node_id=node_det.get('node_id'), job_id=job_id ).save(update=False) except (EtcdException, EtcdConnectionFailed, Exception) as ex: raise TendrlPerformanceMonitoringException( 'Failed to intiate monitoring configuration for plugin \ %s on %s with parameters %s.Error %s' % ( node_det['plugin'], node_det['fqdn'], json.dumps(node_det['plugin_conf']), str(ex) ) )
def update_dashboard(res_name, res_type, integration_id, action): _job_id = str(uuid.uuid4()) _params = { "TendrlContext.integration_id": NS.tendrl_context.integration_id, "TendrlContext.cluster_name": NS.tendrl_context.cluster_name, "Trigger.resource_name": res_name, "Trigger.resource_type": res_type, "Trigger.action": action } _job_payload = { "tags": ["tendrl/integration/monitoring"], "run": "monitoring.flows.UpdateDashboard", "status": "new", "parameters": _params, "type": "monitoring" } Job(job_id=_job_id, status="new", payload=_job_payload).save() return _job_id
def ceph_create_ssh_setup_jobs(parameters): node_list = parameters['Node[]'] ssh_job_ids = [] ssh_setup_script = NS.ceph_provisioner.get_plugin().setup() if len(node_list) > 0: for node in node_list: if NS.node_context.node_id != node: new_params = parameters.copy() new_params['Node[]'] = [node] new_params['ssh_setup_script'] = ssh_setup_script # create same flow for each node in node list except $this payload = { "tags": ["tendrl/node_%s" % node], "run": "tendrl.flows.SetupSsh", "status": "new", "parameters": new_params, "parent": parameters['job_id'], "type": "node" } _job_id = str(uuid.uuid4()) Job(job_id=_job_id, status="new", payload=payload).save() ssh_job_ids.append(_job_id) Event( Message( job_id=parameters['job_id'], flow_id=parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={"message": "Created SSH setup job %s for node" " %s" % (_job_id, node) } ) ) return ssh_job_ids
def test_save(mock_save): job = Job() payload = maps.NamedDict() payload['parent'] = "Test Parent Job Id" job.payload = payload with patch.object(objects.BaseObject, 'load') as mock_load: mock_load.return_value = load(False) job.status = "true" job.save() with patch.object(objects.BaseObject, 'load') as mock_load: mock_load.return_value = load(False) job.status = "failed" job.save() with patch.object(objects.BaseObject, 'load') as mock_load: mock_load.return_value = load(True) job.status = "failed" job.save()
def test_render(): job = Job() assert job.render() is not None
def process_job(job): jid = job.key.split('/')[-1] job_status_key = "/queue/%s/status" % jid job_lock_key = "/queue/%s/locked_by" % jid NS.node_context = NS.node_context.load() # Check job not already locked by some agent try: _locked_by = NS._int.client.read(job_lock_key).value if _locked_by: return except etcd.EtcdKeyNotFound: pass # Check job not already "finished", or "processing" try: _status = NS._int.client.read(job_status_key).value if _status in ["finished", "processing"]: return except etcd.EtcdKeyNotFound: pass # tendrl-node-agent tagged as tendrl/monitor will ensure # >10 min old "new" jobs are timed out and marked as # "failed" (the parent job of these jobs will also be # marked as "failed") if "tendrl/monitor" in NS.node_context.tags: _job_valid_until_key = "/queue/%s/valid_until" % jid _valid_until = None try: _valid_until = NS._int.client.read(_job_valid_until_key).value except etcd.EtcdKeyNotFound: pass if _valid_until: _now_epoch = (time_utils.now() - datetime.datetime( 1970, 1, 1).replace(tzinfo=utc)).total_seconds() if int(_now_epoch) >= int(_valid_until): # Job has "new" status since 10 minutes, # mark status as "failed" and Job.error = # "Timed out" try: NS._int.wclient.write(job_status_key, "failed", prevValue="new") except etcd.EtcdCompareFailed: pass else: job = Job(job_id=jid).load() _msg = str("Timed-out (>10min as 'new')") job.errors = _msg job.save() return else: _now_plus_10 = time_utils.now() + datetime.timedelta(minutes=10) _epoch_start = datetime.datetime(1970, 1, 1).replace(tzinfo=utc) # noinspection PyTypeChecker _now_plus_10_epoch = (_now_plus_10 - _epoch_start).total_seconds() NS._int.wclient.write(_job_valid_until_key, int(_now_plus_10_epoch)) job = Job(job_id=jid).load() if job.payload["type"] == NS.type and \ job.status == "new": # Job routing # Flows created by tendrl-api use 'tags' from flow # definition to target jobs _tag_match = False if job.payload.get("tags", []): for flow_tag in job.payload['tags']: if flow_tag in NS.node_context.tags: _tag_match = True if not _tag_match: _job_tags = ", ".join(job.payload.get("tags", [])) _msg = "Node (%s)(type: %s)(tags: %s) will not " \ "process job-%s (tags: %s)" % \ (NS.node_context.node_id, NS.type, NS.node_context.tags, jid, _job_tags) Event( Message(priority="info", publisher=NS.publisher_id, payload={"message": _msg})) return job_status_key = "/queue/%s/status" % job.job_id job_lock_key = "/queue/%s/locked_by" % job.job_id try: lock_info = dict(node_id=NS.node_context.node_id, fqdn=NS.node_context.fqdn, tags=NS.node_context.tags, type=NS.type) NS._int.wclient.write(job_lock_key, json.dumps(lock_info)) NS._int.wclient.write(job_status_key, "processing", prevValue="new") except etcd.EtcdCompareFailed: # job is already being processed by some tendrl # agent return the_flow = None try: current_ns, flow_name, obj_name = \ _extract_fqdn(job.payload['run']) if obj_name: runnable_flow = current_ns.ns.get_obj_flow(obj_name, flow_name) else: runnable_flow = current_ns.ns.get_flow(flow_name) the_flow = runnable_flow(parameters=job.payload['parameters'], job_id=job.job_id) Event( Message(job_id=job.job_id, flow_id=the_flow.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={"message": "Processing Job %s" % job.job_id})) Event( Message(job_id=job.job_id, flow_id=the_flow.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Running Flow %s" % job.payload['run'] })) the_flow.run() try: NS._int.wclient.write(job_status_key, "finished", prevValue="processing") except etcd.EtcdCompareFailed: # This should not happen! _msg = "Cannot mark job as 'finished', " \ "current job status invalid" raise FlowExecutionFailedError(_msg) Event( Message(job_id=job.job_id, flow_id=the_flow.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Job (%s): Finished " "Flow %s" % (job.job_id, job.payload['run']) })) except (FlowExecutionFailedError, AtomExecutionFailedError, Exception) as e: _trace = str(traceback.format_exc(e)) _msg = "Failure in Job %s Flow %s with error:" % \ (job.job_id, job.payload['run']) Event( ExceptionMessage(priority="error", publisher=NS.publisher_id, payload={ "message": _msg + _trace, "exception": e })) if the_flow: Event( Message(job_id=job.job_id, flow_id=the_flow.parameters['flow_id'], priority="error", publisher=NS.publisher_id, payload={"message": _msg + "\n" + _trace})) else: Event( Message(priority="error", publisher=NS.publisher_id, payload={"message": _msg + "\n" + _trace})) try: NS._int.wclient.write(job_status_key, "failed", prevValue="processing") except etcd.EtcdCompareFailed: # This should not happen! _msg = "Cannot mark job as 'failed', current" \ "job status invalid" raise FlowExecutionFailedError(_msg) else: job = job.load() job.errors = _trace job.save()
def load_job_new(*args): return Job(job_id="uuid", status='new')
def gluster_create_ssh_setup_jobs(parameters, skip_current_node=False): node_list = copy.deepcopy(parameters['Node[]']) ssh_job_ids = [] ssh_key, err = NS.gluster_provisioner.get_plugin().setup() if err != "": _msg = "Error generating ssh key on node %s" % NS.node_context.node_id Event( Message( job_id=parameters['job_id'], flow_id=parameters['flow_id'], priority="error", publisher=NS.publisher_id, payload={"message": _msg } ) ) raise FlowExecutionFailedError(_msg) if not skip_current_node: ret_val, err = authorize_key.AuthorizeKey(ssh_key).run() if ret_val is not True or err != "": _msg = "Error adding authorized key for node %s" % \ NS.node_context.node_id Event( Message( job_id=parameters['job_id'], flow_id=parameters['flow_id'], priority="error", publisher=NS.publisher_id, payload={ "message": _msg } ) ) raise FlowExecutionFailedError(_msg) node_list.remove(NS.node_context.node_id) for node in node_list: if node == NS.node_context.node_id: continue new_params = parameters.copy() new_params['Node[]'] = [node] new_params['ssh_key'] = ssh_key # Create same flow for each node from list except this one payload = { "tags": ["tendrl/node_%s" % node], "run": "tendrl.flows.AuthorizeSshKey", "status": "new", "parameters": new_params, "parent": parameters['job_id'], "type": "node" } _job_id = str(uuid.uuid4()) Job( job_id=_job_id, status="new", payload=payload ).save() ssh_job_ids.append(_job_id) Event( Message( job_id=parameters['job_id'], flow_id=parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={"message": "Created SSH setup job %s for node %s" % (_job_id, node) } ) ) return ssh_job_ids
def run(self): integration_id = self.parameters['TendrlContext.integration_id'] if integration_id is None: _msg = "TendrlContext.integration_id cannot be empty" raise FlowExecutionFailedError(_msg) if "Cluster.node_configuration" not in self.parameters.keys(): _msg = "Cluster.node_configuration cannot be empty" raise FlowExecutionFailedError(_msg) ssh_job_ids = [] ssh_setup_script = NS.ceph_provisioner.get_plugin().setup() for node_id in self.parameters["Cluster.node_configuration"].keys(): new_params = dict() new_params['Node[]'] = [node_id] new_params['ssh_setup_script'] = ssh_setup_script payload = { "tags": ["tendrl/node_%s" % node_id], "run": "tendrl.flows.SetupSsh", "status": "new", "parameters": new_params, "parent": self.parameters['job_id'], "type": "node" } _job_id = str(uuid.uuid4()) Job(job_id=_job_id, status="new", payload=payload).save() ssh_job_ids.append(_job_id) Event( Message(job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Created SSH setup job %s for node" " %s" % (_job_id, node_id) })) while True: time.sleep(3) all_status = {} for job_id in ssh_job_ids: # noinspection PyUnresolvedReferences all_status[job_id] = NS._int.client.read("/queue/%s/status" % job_id).value _failed = { _jid: status for _jid, status in all_status.iteritems() if status == "failed" } if _failed: raise FlowExecutionFailedError( "SSH setup failed for jobs %s cluster %s" % (str(_failed), integration_id)) if all([status == "finished" for status in all_status.values()]): Event( Message(job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "SSH setup completed for all nodes" })) break Event( Message(job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Adding OSDs to ceph cluster %s" % integration_id })) add_osds(self.parameters)
def run(self): self.pre_run = [] self.atoms = [] self.post_run = [] integration_id = self.parameters['TendrlContext.integration_id'] tendrl_ns.tendrl_context.integration_id = integration_id tendrl_ns.tendrl_context.save() node_list = self.parameters['Node[]'] if len(node_list) > 1: # This is the master node for this flow for node in node_list: if tendrl_ns.node_context.node_id != node: new_params = self.parameters.copy() new_params['Node[]'] = [node] # create same flow for each node in node list except $this # TODO(team) The .save() below needs to save the job exactly as the API does Job(job_id=str(uuid.uuid4()), integration_id=integration_id, run="tendrl.node_agent.flows.ImportCluster", status="new", parameters=new_params, type="node", parent=self.parameters['request_id'], node_ids=[node]).save() Event( Message( priority="info", publisher=tendrl_ns.publisher_id, payload={ "message": "Import cluster job created on node" " %s" % node }, request_id=self.parameters['request_id'], flow_id=self.uuid, cluster_id=tendrl_ns.tendrl_context.integration_id, ) ) Event( Message( priority="info", publisher=tendrl_ns.publisher_id, payload={ "message": "Import cluster job started on node %s" % tendrl_ns.node_context.fqdn }, request_id=self.parameters['request_id'], flow_id=self.uuid, cluster_id=tendrl_ns.tendrl_context.integration_id, ) ) sds_name = self.parameters['DetectedCluster.sds_pkg_name'] if "ceph" in sds_name.lower(): node_context = tendrl_ns.node_context.load() if "mon" in node_context.tags: import_ceph( tendrl_ns.tendrl_context.integration_id, self.parameters['request_id'], self.uuid ) else: import_gluster( tendrl_ns.tendrl_context.integration_id, self.parameters['request_id'], self.uuid ) # import cluster's run() should not return unless the new cluster entry # is updated in etcd, as the job is marked as finished if this # function is returned. This might lead to inconsistancy in the API # functionality. The below loop waits for the cluster details # to be updated in etcd. while True: gevent.sleep(2) try: tendrl_ns.etcd_orm.client.read("/clusters/%s" % integration_id) break except etcd.EtcdKeyNotFound: continue
def load_job_failed(*args): return Job(job_id="uuid", status='failed')
def load_unfinished_job(*args): return Job(job_id="uuid", status='in_progress')
def run(self): Event( Message( priority="info", publisher=NS.publisher_id, payload={ "message": "Generating brick mapping for gluster volume" }, job_id=self.parameters["job_id"], flow_id=self.parameters["flow_id"], cluster_id=NS.tendrl_context.integration_id, ) ) brick_count = self.parameters.get('Volume.brick_count') subvol_size = self.parameters.get('Volume.subvol_size') message = "" # get brick_count number of bricks from all the selected nodes nodes = {} for node in self.parameters.get('Cluster.node_configuration'): key = "nodes/%s/NodeContext/fqdn" % node host = NS._int.client.read(key).value nodes[host] = [] hosts = NS._int.client.read( '/clusters/%s/Bricks/free/' % NS.tendrl_context.integration_id ) for host in hosts.leaves: host = host.key.split("/")[-1] bricks = NS._int.client.read( '/clusters/%s/Bricks/free/%s' % ( NS.tendrl_context.integration_id, host ) ) for brick in bricks.leaves: brick = brick.key.split("/")[-1] if host in nodes: if len(nodes[host]) < brick_count: nodes[host].append(brick) # Form a brick list such that when you fill sub volumes with # bricks from this list, it should honour the failure domains brick_list = [] total_bricks = len(nodes) * brick_count for iterator in range(total_bricks): brick_list.append("") counter = 0 node_count = len(nodes) for key, value in nodes.iteritems(): if len(value) < brick_count: message = "Host %s has %s bricks which is less than" + \ " bricks per host %s" % ( key, len(value), brick_count ) job = Job(job_id=self.parameters["job_id"]).load() res = {"message": message, "result": [[]], "optimal": False} job.output["GenerateBrickMapping"] = json.dumps(res) job.save() return False for i in range(brick_count): brick_list[node_count * i + counter] = value[i] counter += 1 # Check if total number of bricks available is less than the # sub volume size. If its less, then return accordingly if len(brick_list) < subvol_size: message = "Total bricks available %s less than subvol_size %s" % ( len(brick_list), subvol_size ) job = Job(job_id=self.parameters["job_id"]).load() res = {"message": message, "result": [[]], "optimal": False} job.output["GenerateBrickMapping"] = json.dumps(res) job.save() return False # Fill the result list with bricks from the brick_list, # try to fill untill you exhaust the brick list and # also the number of sub volumes is maximum for the # available list result = [] lower_bound = 0 upper_bound = subvol_size while True: if upper_bound > len(brick_list): break subvol = brick_list[lower_bound:upper_bound] result.append(subvol) lower_bound = upper_bound upper_bound += subvol_size # check if the mapping provided is optimal as per expected # failure domain or not optimal = True if node_count < subvol_size: optimal = False # Write the result back to the job job = Job(job_id=self.parameters["job_id"]).load() res = {"message": message, "result": result, "optimal": optimal} job.output["GenerateBrickMapping"] = json.dumps(res) job.save() return True
def run(self): try: # Lock nodes create_cluster_utils.acquire_node_lock(self.parameters) integration_id = self.parameters['TendrlContext.integration_id'] sds_name = self.parameters['DetectedCluster.sds_pkg_name'] if not self.parameters.get('import_after_expand', False) and \ not self.parameters.get('import_after_create', False): # check if gdeploy in already provisioned in this cluster # if no it has to be provisioned here if sds_name.find("gluster") > -1 and \ not self.parameters.get("gdeploy_provisioned", False) and \ not self._probe_and_mark_provisioner( self.parameters["Node[]"], integration_id ): create_cluster_utils.install_gdeploy() create_cluster_utils.install_python_gdeploy() ssh_job_ids = create_cluster_utils.gluster_create_ssh_setup_jobs( self.parameters) while True: gevent.sleep(3) all_status = {} for job_id in ssh_job_ids: all_status[job_id] = NS._int.client.read( "/queue/%s/status" % job_id).value _failed = { _jid: status for _jid, status in all_status.iteritems() if status == "failed" } if _failed: raise AtomExecutionFailedError( "SSH setup failed for jobs %s cluster %s" % (str(_failed), integration_id)) if all([ status == "finished" for status in all_status.values() ]): Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "SSH setup completed for all nodes in cluster %s" % integration_id })) # set this node as gluster provisioner tags = ["provisioner/%s" % integration_id] NS.node_context = NS.node_context.load() tags += NS.node_context.tags NS.node_context.tags = list(set(tags)) NS.node_context.save() # set gdeploy_provisioned to true so that no other nodes # tries to configure gdeploy self.parameters['gdeploy_provisioned'] = True break NS.tendrl_context = NS.tendrl_context.load() NS.tendrl_context.integration_id = integration_id _detected_cluster = NS.tendrl.objects.DetectedCluster().load() NS.tendrl_context.cluster_id = _detected_cluster.detected_cluster_id NS.tendrl_context.cluster_name = _detected_cluster.detected_cluster_name NS.tendrl_context.sds_name = _detected_cluster.sds_pkg_name NS.tendrl_context.sds_version = _detected_cluster.sds_pkg_version NS.tendrl_context.save() Event( Message(job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Registered Node %s with cluster %s" % (NS.node_context.node_id, NS.tendrl_context.integration_id) })) node_list = self.parameters['Node[]'] cluster_nodes = [] if len(node_list) > 1: # This is the master node for this flow for node in node_list: if NS.node_context.node_id != node: new_params = self.parameters.copy() new_params['Node[]'] = [node] # create same flow for each node in node list except $this payload = { "tags": ["tendrl/node_%s" % node], "run": "tendrl.flows.ImportCluster", "status": "new", "parameters": new_params, "parent": self.parameters['job_id'], "type": "node" } _job_id = str(uuid.uuid4()) cluster_nodes.append(_job_id) Job(job_id=_job_id, status="new", payload=payload).save() Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Importing (job: %s) Node %s to cluster %s" % (_job_id, node, integration_id) })) if "ceph" in sds_name.lower(): node_context = NS.node_context.load() is_mon = False for tag in node_context.tags: mon_tag = NS.compiled_definitions.get_parsed_defs( )['namespace.tendrl']['tags']['ceph-mon'] if mon_tag in tag: is_mon = True if is_mon: # Check if minimum required version of underlying ceph # cluster met. If not fail the import task detected_cluster = NS.tendrl.objects.DetectedCluster( ).load() detected_cluster_ver = detected_cluster.sds_pkg_version.split( '.') maj_ver = detected_cluster_ver[0] min_ver = detected_cluster_ver[1] reqd_ceph_ver = NS.compiled_definitions.get_parsed_defs( )['namespace.tendrl']['min_reqd_ceph_ver'] req_maj_ver, req_min_ver, req_rel = reqd_ceph_ver.split( '.') Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Check: Minimum required version (%s.%s.%s) of Ceph Storage" % (req_maj_ver, req_min_ver, req_rel) })) if int(maj_ver) < int(req_maj_ver) or \ int(min_ver) < int(req_min_ver): Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="error", publisher=NS.publisher_id, payload={ "message": "Error: Minimum required version (%s.%s.%s) " "doesnt match that of detected Ceph Storage (%s.%s.%s)" % (req_maj_ver, req_min_ver, req_rel, maj_ver, min_ver, 0) })) raise FlowExecutionFailedError( "Detected ceph version: %s" " is lesser than required version: %s" % (detected_cluster.sds_pkg_version, reqd_ceph_ver)) import_ceph(self.parameters) else: # Check if minimum required version of underlying gluster # cluster met. If not fail the import task detected_cluster = NS.tendrl.objects.DetectedCluster().load() detected_cluster_ver = detected_cluster.sds_pkg_version.split( '.') maj_ver = detected_cluster_ver[0] min_ver = detected_cluster_ver[1] reqd_gluster_ver = NS.compiled_definitions.get_parsed_defs( )['namespace.tendrl']['min_reqd_gluster_ver'] req_maj_ver, req_min_ver, req_rel = reqd_gluster_ver.split('.') Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Check: Minimum required version (%s.%s.%s) of Gluster Storage" % (req_maj_ver, req_min_ver, req_rel) })) if int(maj_ver) < int(req_maj_ver) or \ int(min_ver) < int(req_min_ver): Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="error", publisher=NS.publisher_id, payload={ "message": "Error: Minimum required version (%s.%s.%s) " "doesnt match that of detected Gluster Storage (%s.%s.%s)" % (req_maj_ver, req_min_ver, req_rel, maj_ver, min_ver, 0) })) raise FlowExecutionFailedError( "Detected gluster version: %s" " is lesser than required version: %s" % (detected_cluster.sds_pkg_version, reqd_gluster_ver)) import_gluster(self.parameters) Event( Message(job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Waiting for participant nodes %s to be " "imported %s" % (node_list, integration_id) })) # An import is sucessfull once all Node[] register to # /clusters/:integration_id/nodes/:node_id while True: _all_node_status = [] gevent.sleep(3) for node_id in self.parameters['Node[]']: _status = NS.tendrl.objects.ClusterNodeContext(node_id=node_id).exists() \ and NS.tendrl.objects.ClusterTendrlContext( integration_id=integration_id ).exists() _all_node_status.append(_status) if _all_node_status: if all(_all_node_status): Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Import Cluster completed for all nodes " "in cluster %s" % integration_id })) break Event( Message(job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Sucessfully imported cluster %s" % integration_id })) except Exception as ex: # For traceback Event( ExceptionMessage(priority="error", publisher=NS.publisher_id, payload={ "message": ex.message, "exception": ex })) # raising exception to mark job as failed raise ex finally: # release lock create_cluster_utils.release_node_lock(self.parameters) return True
def run(self): integration_id = self.parameters['TendrlContext.integration_id'] # Wait till detected cluster in populated for nodes Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "SDS install and config completed, " "Waiting for tendrl-node-agent to " "detect newly installed sds details %s %s" % ( integration_id, self.parameters['Node[]'] ) } ) ) while True: time.sleep(3) all_status = [] for node in self.parameters['Node[]']: try: NS._int.client.read( "/nodes/%s/DetectedCluster/detected_cluster_id" % node ) all_status.append(True) except etcd.EtcdKeyNotFound: all_status.append(False) if all_status: if all(all_status): break # Create the params list for import cluster flow new_params = dict() new_params['Node[]'] = self.parameters['Node[]'] new_params['TendrlContext.integration_id'] = integration_id # Get node context for one of the nodes from list detected_cluster_id = NS._int.client.read( "nodes/%s/DetectedCluster/detected_cluster_id" % self.parameters['Node[]'][0] ).value sds_pkg_name = NS._int.client.read( "nodes/%s/DetectedCluster/sds_pkg_name" % self.parameters['Node[' ']'][0] ).value sds_pkg_version = NS._int.client.read( "nodes/%s/DetectedCluster/sds_pkg_version" % self.parameters[ 'Node[]'][0] ).value new_params['DetectedCluster.sds_pkg_name'] = \ sds_pkg_name new_params['DetectedCluster.sds_pkg_version'] = \ sds_pkg_version new_params['import_after_create'] = True payload = {"tags": ["detected_cluster/%s" % detected_cluster_id], "run": "tendrl.flows.ImportCluster", "status": "new", "parameters": new_params, "parent": self.parameters['job_id'], "type": "node" } _job_id = str(uuid.uuid4()) Job(job_id=_job_id, status="new", payload=payload).save() Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={"message": "Please wait while Tendrl imports newly " "created %s SDS Cluster %s" " Import job id :%s" % (sds_pkg_name, integration_id, _job_id) } ) ) return True
def run(self): try: integration_id = self.parameters['TendrlContext.integration_id'] # Lock nodes create_cluster_utils.acquire_node_lock(self.parameters) NS.tendrl_context = NS.tendrl_context.load() # TODO(team) when Tendrl supports create/expand/shrink cluster # setup passwordless ssh for all gluster nodes with given # integration_id (check # /indexes/tags/tendrl/integration/$integration_id for list of # nodes in cluster node_list = self.parameters['Node[]'] cluster_nodes = [] if len(node_list) > 1: # This is the master node for this flow for node in node_list: if NS.node_context.node_id != node: new_params = self.parameters.copy() new_params['Node[]'] = [node] # create same flow for each node in node list except # $this payload = {"tags": ["tendrl/node_%s" % node], "run": "tendrl.flows.ImportCluster", "status": "new", "parameters": new_params, "parent": self.parameters['job_id'], "type": "node" } _job_id = str(uuid.uuid4()) cluster_nodes.append(_job_id) Job(job_id=_job_id, status="new", payload=payload).save() Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Importing (job: %s) Node %s " "to cluster %s" % (_job_id, node, integration_id) } ) ) # Check if minimum required version of underlying gluster # cluster met. If not fail the import task cluster_ver = \ NS.tendrl_context.sds_version.split('.') maj_ver = cluster_ver[0] min_ver = re.findall(r'\d+', cluster_ver[1])[0] reqd_gluster_ver = NS.compiled_definitions.get_parsed_defs()[ 'namespace.tendrl' ]['min_reqd_gluster_ver'] req_maj_ver, req_min_ver, req_rel = reqd_gluster_ver.split('.') Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Check: Minimum required version (" "%s.%s.%s) of Gluster Storage" % (req_maj_ver, req_min_ver, req_rel) } ) ) ver_check_failed = False if int(maj_ver) < int(req_maj_ver): ver_check_failed = True else: if int(maj_ver) == int(req_maj_ver) and \ int(min_ver) < int(req_min_ver): ver_check_failed = True if ver_check_failed: Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="error", publisher=NS.publisher_id, payload={ "message": "Error: Minimum required version " "(%s.%s.%s) " "doesnt match that of detected Gluster " "Storage (%s.%s.%s)" % (req_maj_ver, req_min_ver, req_rel, maj_ver, min_ver, 0) } ) ) raise FlowExecutionFailedError( "Detected gluster version: %s" " is lesser than required version: %s" % ( NS.tendrl_context.sds_version, reqd_gluster_ver ) ) if not import_gluster(self.parameters): return False if len(node_list) > 1: Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Waiting for participant nodes %s to " "be " "imported %s" % (node_list, integration_id) } ) ) loop_count = 0 # Wait for (no of nodes) * 6 minutes for import to complete wait_count = (len(node_list) - 1) * 36 while True: parent_job = Job(job_id=self.parameters['job_id']).load() if loop_count >= wait_count: Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Import jobs not yet complete " "on all nodes. Timing out. (%s, %s)" % (str(node_list), integration_id) } ) ) return False time.sleep(10) finished = True for child_job_id in parent_job.children: child_job = Job(job_id=child_job_id).load() if child_job.status != "finished": finished = False break if finished: break else: loop_count += 1 continue except Exception as ex: # For traceback Event( ExceptionMessage( priority="error", publisher=NS.publisher_id, payload={ "message": ex.message, "exception": ex } ) ) # raising exception to mark job as failed raise ex finally: # release lock create_cluster_utils.release_node_lock(self.parameters) return True
def load_job_finished(*args): return Job(job_id="uuid", status='finished')
def run(self): try: # Lock nodes create_cluster_utils.acquire_node_lock(self.parameters) integration_id = self.parameters['TendrlContext.integration_id'] if integration_id is None: raise FlowExecutionFailedError( "TendrlContext.integration_id cannot be empty") supported_sds = NS.compiled_definitions.get_parsed_defs( )['namespace.tendrl']['supported_sds'] sds_name = self.parameters["TendrlContext.sds_name"] if sds_name not in supported_sds: raise FlowExecutionFailedError("SDS (%s) not supported" % sds_name) ssh_job_ids = [] if "ceph" in sds_name: ssh_job_ids = create_cluster_utils.ceph_create_ssh_setup_jobs( self.parameters) else: ssh_job_ids = \ create_cluster_utils.gluster_create_ssh_setup_jobs( self.parameters, skip_current_node=True ) while True: time.sleep(3) all_status = {} for job_id in ssh_job_ids: all_status[job_id] = NS._int.client.read( "/queue/%s/status" % job_id).value _failed = { _jid: status for _jid, status in all_status.iteritems() if status == "failed" } if _failed: raise FlowExecutionFailedError( "SSH setup failed for jobs %s cluster %s" % (str(_failed), integration_id)) if all( [status == "finished" for status in all_status.values()]): Event( Message(job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "SSH setup completed for all " "nodes in cluster %s" % integration_id })) break # SSH setup jobs finished above, now install sds # bits and create cluster if "ceph" in sds_name: Event( Message(job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Expanding ceph cluster %s" % integration_id })) ceph_help.expand_cluster(self.parameters) else: Event( Message(job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Expanding Gluster Storage" " Cluster %s" % integration_id })) gluster_help.expand_gluster(self.parameters) Event( Message( job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "SDS install/config completed on newly " "expanded nodes, Please wait while " "tendrl-node-agents detect sds details on the newly " "expanded nodes %s" % self.parameters['Node[]'] })) # Wait till detected cluster in populated for nodes while True: time.sleep(3) all_status = [] detected_cluster = "" different_cluster_id = False dc = "" for node in self.parameters['Node[]']: try: dc = NS._int.client.read( "/nodes/%s/DetectedCluster/detected_cluster_id" % node).value if not detected_cluster: detected_cluster = dc else: if detected_cluster != dc: all_status.append(False) different_cluster_id = True break all_status.append(True) except etcd.EtcdKeyNotFound: all_status.append(False) if different_cluster_id: raise FlowExecutionFailedError( "Seeing different detected cluster id in" " different nodes. %s and %s" % (detected_cluster, dc)) if all_status: if all(all_status): break # Create the params list for import cluster flow new_params = dict() new_params['Node[]'] = self.parameters['Node[]'] new_params['TendrlContext.integration_id'] = integration_id # Get node context for one of the nodes from list sds_pkg_name = NS._int.client.read( "nodes/%s/DetectedCluster/" "sds_pkg_name" % self.parameters['Node[]'][0]).value new_params['import_after_expand'] = True sds_pkg_version = NS._int.client.read( "nodes/%s/DetectedCluster/sds_pkg_" "version" % self.parameters['Node[]'][0]).value new_params['DetectedCluster.sds_pkg_name'] = \ sds_pkg_name new_params['DetectedCluster.sds_pkg_version'] = \ sds_pkg_version tags = [] for node in self.parameters['Node[]']: tags.append("tendrl/node_%s" % node) payload = { "tags": tags, "run": "tendrl.flows.ImportCluster", "status": "new", "parameters": new_params, "parent": self.parameters['job_id'], "type": "node" } _job_id = str(uuid.uuid4()) # release lock before import cluster create_cluster_utils.release_node_lock(self.parameters) Job(job_id=_job_id, status="new", payload=payload).save() Event( Message(job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Please wait while Tendrl imports (" "job_id: %s) newly expanded " "%s storage nodes %s" % (_job_id, sds_pkg_name, integration_id) })) except Exception as ex: Event( ExceptionMessage(priority="error", publisher=NS.publisher_id, payload={ "message": ex.message, "exception": ex })) # raising exception to mark job as failed raise ex finally: # release lock if any exception came create_cluster_utils.release_node_lock(self.parameters)
def run(self): Event( Message( priority="info", publisher=NS.publisher_id, payload={ "message": "Checking if a new pool has to be created or " "not for rbd creation" }, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], cluster_id=NS.tendrl_context.integration_id, )) if not self.parameters.get('Rbd.pool_id'): # Checking if mandatory parameters for pool creation are present mandatory_pool_params = Set([ "Rbd.pool_poolname", "Rbd.pool_pg_num", "Rbd.pool_size", "Rbd.pool_min_size" ]) missing_params = list( mandatory_pool_params.difference(Set(self.parameters.keys()))) if not missing_params: # Mapping the passed pool parameters into required keys pool_parameters = {} for key, value in self.parameters.items(): if "Rbd.pool_" in key: pool_parameters[key.replace("Rbd.pool_", "Pool.")] =\ value payload = { "integration_id": NS.tendrl_context.integration_id, "run": "ceph.flows.CreatePool", "status": "new", "parameters": pool_parameters, "parent": self.parameters['job_id'], "type": "sds", "tags": ["tendrl/integration/$TendrlContext." "integration_id"] } Event( Message( priority="error", publisher=NS.publisher_id, payload={"message": "Creating job for pool creation"}, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], cluster_id=NS.tendrl_context.integration_id, )) _job_id = str(uuid.uuid4()) Job(job_id=_job_id, status="new", payload=payload).save() Event( Message( priority="error", publisher=NS.publisher_id, payload={ "message": "Checking for successful pool creation" }, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], cluster_id=NS.tendrl_context.integration_id, )) pool_created = False job_status = "new" while not pool_created: try: job_status = NS._int.client.read("/queue/%s/status" % _job_id).value except etcd.EtcdKeyNotFound: Event( Message( priority="error", publisher=NS.publisher_id, payload={ "message": "Failed to fetch pool " "creation status for rbd " "creation" }, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], cluster_id=NS.tendrl_context.integration_id, )) break if job_status == "finished": pool_created = True elif job_status == "failed": break if pool_created: # Setting pool_id for rbd creation pool_id = self._get_pool_id( self.parameters['Rbd.pool_poolname']) if pool_id: self.parameters['Rbd.pool_id'] = pool_id else: Event( Message( priority="error", publisher=NS.publisher_id, payload={ "message": "Failed to fetch pool_id %s ." "Cannot create rbd without " "pool_id." % pool_id }, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], cluster_id=NS.tendrl_context.integration_id, )) return False else: Event( Message( priority="error", publisher=NS.publisher_id, payload={ "message": "Failed to create pool. " "Cannot proceed with rbd creation." }, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], cluster_id=NS.tendrl_context.integration_id, )) return False else: Event( Message( priority="info", publisher=NS.publisher_id, payload={ "message": "Mandatory parameters %s for pool " "creation not present. Cannot continue" " with rbd creation." % ', '.join(missing_params) }, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], cluster_id=NS.tendrl_context.integration_id, )) return False attrs = dict(name=self.parameters['Rbd.name'], size=str(self.parameters['Rbd.size']), pool_id=self.parameters.get('Rbd.pool_id')) Event( Message( priority="info", publisher=NS.publisher_id, payload={ "message": "Creating rbd %s on pool %s" % (self.parameters['Rbd.name'], self.parameters['Rbd.pool_id']) }, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], cluster_id=NS.tendrl_context.integration_id, )) crud = Crud() resp = crud.create("rbd", attrs) try: crud.sync_request_status(resp['request']) except RequestStateError as ex: Event( Message( priority="info", publisher=NS.publisher_id, payload={ "message": "Failed to create rbd %s." " Error: %s" % (self.parameters['Rbd.name'], ex) }, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], cluster_id=NS.tendrl_context.integration_id, )) return False Event( Message( priority="info", publisher=NS.publisher_id, payload={ "message": "Successfully created rbd %s on pool %s" % (self.parameters['Rbd.name'], self.parameters['Rbd.pool_id']) }, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], cluster_id=NS.tendrl_context.integration_id, )) pool_name = NS._int.client.read("clusters/%s/Pools/%s/pool_name" % (NS.tendrl_context.integration_id, self.parameters['Rbd.pool_id'])).value rbd_details = NS.state_sync_thread._get_rbds(pool_name) for k, v in rbd_details.iteritems(): NS.ceph.objects.Rbd( name=k, size=v['size'], pool_id=self.parameters['Rbd.pool_id'], flags=v['flags'], provisioned=NS.state_sync_thread._to_bytes(v['provisioned']), used=NS.state_sync_thread._to_bytes(v['used'])).save() return True
def sync(sync_ttl=None): try: tags = [] # update node agent service details Event( Message(priority="debug", publisher=NS.publisher_id, payload={"message": "node_sync, Updating Service data"})) for service in TENDRL_SERVICES: s = NS.tendrl.objects.Service(service=service) if s.running: service_tag = NS.compiled_definitions.get_parsed_defs( )['namespace.tendrl']['tags'][service.strip("@*")] tags.append(service_tag) if service_tag == "tendrl/server": tags.append("tendrl/monitor") s.save() _cluster = NS.tendrl.objects.Cluster( integration_id=NS.tendrl_context.integration_id).load() if _cluster.is_managed == "yes": # Try to claim orphan "provisioner_%integration_id" tag _tag = "provisioner/%s" % _cluster.integration_id _is_new_provisioner = False NS.node_context = NS.tendrl.objects.NodeContext().load() if _tag not in NS.node_context.tags: try: _index_key = "/indexes/tags/%s" % _tag _node_id = json.dumps([NS.node_context.node_id]) NS._int.wclient.write(_index_key, _node_id, prevExist=False) etcd_utils.refresh(_index_key, sync_ttl) tags.append(_tag) _is_new_provisioner = True except etcd.EtcdAlreadyExist: pass # updating node context with latest tags Event( Message(priority="debug", publisher=NS.publisher_id, payload={ "message": "node_sync, updating node context " "data with tags" })) NS.node_context = NS.tendrl.objects.NodeContext().load() current_tags = list(NS.node_context.tags) tags += current_tags NS.node_context.tags = list(set(tags)) NS.node_context.tags.sort() current_tags.sort() if NS.node_context.tags != current_tags: NS.node_context.save() if _cluster.is_managed == "yes": if _is_new_provisioner: _msg = "node_sync, NEW provisioner node found! re-configuring monitoring (job-id: %s) on this node" payload = { "tags": ["tendrl/node_%s" % NS.node_context.node_id], "run": "tendrl.flows.ConfigureMonitoring", "status": "new", "parameters": { 'TendrlContext.integration_id': NS.tendrl_context.integration_id }, "type": "node" } _job_id = str(uuid.uuid4()) Job(job_id=_job_id, status="new", payload=payload).save() Event( Message(priority="debug", publisher=NS.publisher_id, payload={"message": _msg % _job_id})) # Update /indexes/tags/:tag = [node_ids] for tag in NS.node_context.tags: index_key = "/indexes/tags/%s" % tag _node_ids = [] try: _node_ids = NS._int.client.read(index_key).value _node_ids = json.loads(_node_ids) except etcd.EtcdKeyNotFound: pass if _node_ids: if "provisioner" in tag: # Check if this is a stale provisioner if NS.node_context.node_id != _node_ids[0]: NS.node_context.tags.remove(tag) NS.node_context.save() continue if NS.node_context.node_id in _node_ids: continue else: _node_ids += [NS.node_context.node_id] else: _node_ids = [NS.node_context.node_id] _node_ids = list(set(_node_ids)) etcd_utils.write(index_key, json.dumps(_node_ids)) if sync_ttl and len(_node_ids) == 1: etcd_utils.refresh(index_key, sync_ttl) Event( Message(priority="debug", publisher=NS.publisher_id, payload={ "message": "node_sync, Updating detected " "platform" })) except Exception as ex: Event( ExceptionMessage(priority="error", publisher=NS.publisher_id, payload={ "message": "node_sync service and indexes " "sync failed: " + ex.message, "exception": ex }))