def exists_group(self, name: str) -> Result: """ Test if a SLURM account exists with a specified name Parameters ---------- name : str Name to check against SLURM account names Returns --------- Result Result.status is 0 for success, >0 on error Result.response is True for exists, False for does not exist """ self._logger.info("exists_account {0}".format(name)) cmd = self._sacctmgr + " -n list account name=" + name stdout = self.__run_cmd2__(cmd) if stdout is not None: if len(stdout.splitlines()) == 1: return Result(0, True) else: return Result(0, False) return Result(1, False)
def add_user(self, user_atts: dict) -> Result: """ Add a new SLURM user. If the user already exists it is no overwritten. Parameters ---------- user_atts: dict Properties defining a SLURM user Returns --------- Result Result.status is 0 for success, >0 on error See Also -------- update_user """ self._logger.info("add_user {0}".format(user_atts)) user = SlurmUser(user_atts) # If the user already exists, do nothing result = self.exists_user(user.name()) if result.is_error(): return Result(1, "Error testing user exists") elif result.response is True: return Result(0, "User already exists") # Otherwise, add the user cmd = self._sacctmgr + " -i create user {0}".format(user) status = self.__run_cmd__(cmd) return Result(status, None)
def get_members(self, name: str) -> Result: """ Retrieve the user names of all users that are associated with the account Parameters ---------- name : str SLURM account name Returns --------- Result Result.status is 0 for success, >0 on error Result.response is an array of user names associated with the SLURM account. If the operation fails, an empty array is returned """ self._logger.info("get_members {0}".format(name)) cmd = self._sacctmgr + " -P list account WithAssoc name='" + name + "' format='User'" stdout = self.__run_cmd2__(cmd) members = [] if stdout is None: self._logger.error("Command %s requested STDOUT but returned None", cmd) return Result(1, members) else: lines = stdout.splitlines() for i in range(1, len(lines)): if lines[i] is not '': members.append(lines[i]) self._logger.debug(members) return Result(0, members)
def exists_user(self, name: str) -> Result: """ Test if a user exists with a specified name Parameters ---------- name : str SLURM user name Returns --------- Result Result.status is 0 for success, >0 on error Result.response is True for exists, False for does not exist """ self._logger.info("exists_user {0}".format(name)) cmd = self._sacctmgr + " -n list user name=" + name stdout = self.__run_cmd2__(cmd) if stdout is not None: if len(stdout.splitlines()) == 1: self._logger.debug("User %s does exist", name) return Result(0, True) else: self._logger.debug("User %s does NOT exist", name) return Result(0, False) return Result(1, "Command Error")
def list_groups(self): """ Retrieve the names of all SLURM accounts in the database Returns --------- Result Result.status is 0 for success, >0 on error Result.response is an array of account names defined in the database """ self._logger.info("list_groups") cmd = self._sacctmgr + " -P list account" stdout = self.__run_cmd2__(cmd) accounts = [] if stdout is None: self._logger.error("Command %s requested STDOUT but returned None", cmd) return Result(1, accounts) else: lines = stdout.splitlines() for i in range(1, len(lines)): if lines[i] is not '': tokens = lines[i].split("|") accounts.append(tokens[0]) self._logger.debug(accounts) return Result(0, accounts)
def delete_user(self, name: str) -> Result: """ Delete a user from SLURM. Parameters ---------- name : str SLURM user name Returns --------- Result Result.status is 0 for success, >0 on error """ self._logger.info("delete_user {0}".format(name)) # If delete operations are disabled if self._block_delete: return Result( 1, "Delete operations on users and groups are disabled") exists_user = self.exists_user(name) if exists_user.is_error(): return Result(1, "Error testing user exists") elif exists_user.response is False: return Result(0, "User does not exist") cmd = self._sacctmgr + " -i delete user name=" + name status = self.__run_cmd__(cmd) return Result(status, None)
def get_group(self, name: str) -> Result: """ Retrieve the current configuration of an account Parameters ---------- name : str SLURM account name Returns --------- Result Result.status is 0 for success, >0 on error Result.response is a dictionary of key=value pairs corresponding to SLURM configuration parameters. If the operation fails, an empty dictionary is returned. """ self._logger.info("get_account {0}".format(name)) group_atts = {"name": name} group_atts.update(self._schema) account = SlurmAccount(group_atts, self._schema) cmd = self._sacctmgr + " -P list account WithAssoc name='" + name + "' format='Account,User," + account.format_string( ) + "'" stdout = self.__run_cmd2__(cmd) account = {} if stdout is None: self._logger.error("Command %s requested STDOUT but returned None", cmd) else: lines = stdout.splitlines() if len(lines) < 2: return Result(1, account) header_tokens = lines[0].split('|') for j in range(1, len(lines)): data_tokens = lines[j].split('|') if data_tokens[1] == "": for i in range(len(header_tokens)): account[header_tokens[i]] = data_tokens[i] break if len(account) == 0: return Result(1, account) # Add group members to returned object account["memberUid"] = [] member_result = self.get_members(name) if member_result.is_success(): account["memberUid"] = member_result.response self._logger.debug(account) return Result(0, account)
def send(self, obj: object) -> Result: """ Publish a message to server Parameters ---------- obj: dict The message to send Returns ---------- Result Result.status is 0 for success, >0 on error Result.response is None on success, and informational message on error """ # Encode the request dict as YAML try: message = yaml.safe_dump(obj) except yaml.YAMLError as exc: self._logger.error("YAML serialization error: %s", exc) self._logger.error("{0}".format(obj)) return Result(453, "Could not serialize the message as YAML") # Send RPC request to server self._logger.debug("Sending %s:%s", self.queue, message.rstrip()) # Send message to server not_sent = True attempts = 1 while not_sent and attempts < 10: try: self.channel.basic_publish(exchange='', routing_key=self.queue, body=message, properties=pika.BasicProperties( delivery_mode=2, # Indicates message should be persisted on disk ), mandatory=True) not_sent = False except Exception as e: time.sleep((attempts - 1) * 5) if self.connection.is_closed: self._logger.error("Failed to send on attempt %d because connection closed. Reconnecting...", attempts) self.connect() attempts = attempts + 1 if not_sent: self._logger.error("Failed to deliver message %s:%s", self.queue, message.rstrip()) return Result(470, "Message could not be delivered") else: self._logger.debug("Success") return Result(0, None)
def error(self, action: str, params: object, result: Result) -> bool: """ Add an error entry to the database Parameters ---------- action : str A unique identifier for the action, usually ${method_name}:${class name} params : object The arguments to the method as scalar, list or dict result: metaroot.api.Result The Result of the failed operation that contains more granular information about the error Returns --------- metaroot.api.Result True for success Raises --------- Exception if the database if an underlying operation raised an exception """ return self._insert( (datetime.datetime.now(), ActivityStream.ERROR, action, yaml.safe_dump(params), result.status, yaml.safe_dump(result.to_transport_format())))
def disassociate_users_from_group(self, user_names: list, group_name: str) -> Result: """ Remove a user's association with an account (revoke membership) Parameters ---------- user_names : list SLURM user names group_name : str SLURM account name Returns --------- Result Result.status is 0 for success, >0 on error Result.response is a list of user names that had their default account set to "bench" by the operation """ self._logger.info("disassociate_users_from_account {0}, {1}".format( user_names, group_name)) global_status = 0 affected = [] for user_name in user_names: result = self.disassociate_user_from_group(user_name, group_name) global_status = global_status + result.status if result.response: affected.append(user_name) return Result(global_status, affected)
def associate_user_to_group(self, user_name: str, group_name: str) -> Result: """ Associate an account with a user (grant membership) Parameters ---------- user_name : str SLURM user name group_name : str SLURM account name, optionally prefixed with cluster name delimited by ':' Returns --------- Result Result.status is 0 for success, >0 on error """ self._logger.info("associate_user_to_account {0} {1}".format( user_name, group_name)) if ":" in group_name: parts = group_name.split(":") cluster = " Cluster='" + parts[0] + "'" group_name = parts[1] else: cluster = "" cmd = self._sacctmgr + " -i add user name='" + user_name + "' account='" + group_name + "'" + cluster status = self.__run_cmd__(cmd) return Result(status, None)
def occur_in_response_to(clazz: str, action: str, payload: object, result: Result, n_priors: int) -> int: """ Evaluates the result of an action and performs additional actions as necessary Parameters ---------- clazz: str The class name that implemented the method handling the action action: str The name of the method implemented by clazz that was called payload: object The argument that were passed to the method result: Result The result of the method call n_priors: int A value indicating the number of reactions that have occurred during the requested operation. I.e., this value is set to 0 as the router begins calling methods of each manager implementing the current request action, and it increases by one each time a Result from a manager operation triggers a reaction. """ global config if result.is_error(): send_email( config.get("REACTION_NOTIFY"), "metaroot operation failed", "<table>" + "<tr><td>Class</td><td>" + clazz + "</td></tr>" "<tr><td>Action</td><td>" + action + "</td></tr>" + "<tr><td>Payload</td><td>" + str(payload) + "</td></tr>" + "<tr><td>Result Status</td><td>" + str(result.status) + "</td></tr>" + "<tr><td>Result Payload</td><td>" + str(result.response) + "</td></tr>" + "</table>") return 0
def set_user_default_group(self, user_name: str, group_name: str) -> Result: """ Set a user's default account affiliation Parameters ---------- user_name : str SLURM user name group_name : str SLURM account name Returns --------- Result Result.status is 0 for success, >0 on error """ self._logger.info("set_user_default_account {0}, {1}".format( user_name, group_name)) if ":" in group_name: parts = group_name.split(":") cluster = " Cluster='" + parts[0] + "'" group_name = parts[1] else: cluster = "" cmd = self._sacctmgr + " -i modify user where name=" + user_name + " " + cluster + " set defaultaccount=" + group_name status = self.__run_cmd__(cmd) return Result(status, None)
def record(self, action: str, params: object, result: Result) -> bool: """ Adds an entry to the database as info if result.is_success() and as error otherwise Parameters ---------- action : str A unique identifier for the action, usually ${method_name}:${class name} params : object The arguments to the method as scalar, list or dict result: metaroot.api.Result The Result of the operation Returns --------- metaroot.api.Result True for success Raises --------- Exception if the database if an underlying operation raised an exception """ if result.is_success(): return self.info(action, params) else: return self.error(action, params, result)
def echo(self, message: str) -> Result: global sequence if message != "hello {0}".format(sequence): raise Exception("Expecting 'hello {0}' but consumed '{1}'".format( sequence, message)) sequence = sequence + 1 return Result(0, None)
def add_group(self, group_atts: dict) -> Result: """ Add a new SLURM account Parameters ---------- group_atts : dict Properties defining the SLURM account Returns --------- Result Result.status is 0 for success, >0 if the operation failed (e.g., if the account already exists) """ self._logger.info("add_account {0}".format(group_atts)) account = SlurmAccount(group_atts, self._schema) exists_group = self.exists_group(account.name()) if exists_group.is_error(): return Result(1, "Error checking for existence of group") elif exists_group.response is True: return Result(0, "Group already exists") # base command cmd = self._sacctmgr + " -i -Q add account name={0}".format( account.name()) # add cluster if one has been defined. not specifying a cluster creates associations for all clusters if account.cluster() is not None: cmd = cmd + " cluster={0}".format(account.cluster()) status = self.__run_cmd__(cmd) if status > 0: return Result(status, None) cmd = self._sacctmgr + " -i modify account {0}".format( account.as_update_str()) status = self.__run_cmd__(cmd) return Result(status, None)
def list_users(self, with_default_group: str): """ Retrieve the names of all SLURM users in the database Parameters ---------- with_default_group: str Either the string "any" meaning any group, or a string id of a group that will restrict the result to only users with the specified group set as their default Returns --------- Result Result.status is 0 for success, >0 on error Result.response is an array of user names defined in the database """ self._logger.info("list_users") if with_default_group == "any": cmd = self._sacctmgr + " -P list user" else: cmd = self._sacctmgr + " -P list user where defaultaccount=\"" + with_default_group + "\"" stdout = self.__run_cmd2__(cmd) users = [] if stdout is None: self._logger.error("Command %s requested STDOUT but returned None", cmd) return Result(1, users) else: lines = stdout.splitlines() for i in range(1, len(lines)): if lines[i] is not '': tokens = lines[i].split("|") users.append(tokens[0]) self._logger.debug(users) return Result(0, users)
def disassociate_user_from_group(self, user_name: str, group_name: str) -> Result: """ Remove a user's association with an account (revoke membership) Parameters ---------- user_name : str SLURM user name group_name : str SLURM account name Returns --------- Result Result.status is 0 for success, >0 on error Result.response is True if user had their default group set to "bench", False otherwise """ self._logger.info("disassociate_user_from_account {0}, {1}".format( user_name, group_name)) result = self.get_user(user_name) benched = False # Check if non-existent user name specified if 'default' not in result.response: self._logger.warn( "attempt to disassociate user %s that does not exist", user_name) # If we are trying to remove the primary group affiliation of the user, set their primary affiliation to # the special reserve group 'bench', in which case the user will need to select a new default account for # themself elif result.response['default'] == group_name: # This can fail if the user already has an association the the bench account self.associate_user_to_group(user_name, self._cluster + 'bench') # Move the user to the bench account benched = self.set_user_default_group(user_name, self._cluster + 'bench').is_success() if benched: self._logger.warn( "disassociate_user_from_account {0}, {1} -> User was benched" .format(user_name, group_name)) # Remove the user affiliation cmd = self._sacctmgr + " -i delete user name='" + user_name + "' account='" + group_name + "'" status = self.__run_cmd__(cmd) return Result(status, benched)
def update_group(self, group_atts: dict) -> Result: """ Change the configuration of a SLURM account Parameters ---------- group_atts: dict Properties defining a SLURM account to update. The name of the argument account must match the name of the account to update in the SLURM database. Returns --------- Result Result.status is 0 for success, >0 on error """ self._logger.info("update_account {0}".format(group_atts)) account = SlurmAccount(group_atts, self._schema) cmd = self._sacctmgr + " -i modify account {0}".format( account.as_update_str()) status = self.__run_cmd__(cmd) return Result(status, None)
def delete_group(self, name: str) -> Result: """ Delete an account from SLURM. This operation manages migrating user default accounts away from the account to be deleted prior to attempting the delete operation. Parameters ---------- name : str SLURM account name Returns --------- Result Result.status is 0 for success, >0 on error """ self._logger.info("delete_account {0}".format(name)) # If delete operations are disabled if self._block_delete: return Result( 1, "Delete operations on users and groups are disabled") # If group does not exist, return success but provide informational message exists_group = self.exists_group(name) if exists_group.is_error(): return Result(1, None) elif exists_group.response is False: return Result(0, "Group does not exist") # Get current account members get_members = self.get_members(name) if get_members.is_error(): return Result(1, None) # Remove users linked to the account first. This is a complex operation because it has to change primary # user account affiliations before removing the account below remove_members = self.disassociate_users_from_group( get_members.response, name) if remove_members.is_error(): return Result(2, None) # Remove the account cmd = self._sacctmgr + " -i delete account name=" + name status = self.__run_cmd__(cmd) return Result(status, None)
def update_user(self, user_atts: dict) -> Result: """ Change the configuration of a SLURM user. The argument user object must contain an "Account" attribute, as this method is only applicable to changing user parameters associated with an account. Parameters ---------- user_atts : dict Properties defining a SLURM user. The name of the argument user must match the name of the user to update in the SLURM database. Returns --------- Result Result.status is 0 for success, >0 on error """ self._logger.info("update_user {0}".format(user_atts)) user = SlurmUser(user_atts) cmd = self._sacctmgr + " -i modify user {0}".format( user.as_update_str()) status = self.__run_cmd__(cmd) return Result(status, None)
def exists_group(self, name: str): return Result(0, "exists_group:" + self.name)
def add_group(self, group_atts: dict) -> Result: return Result(0, group_atts)
def send(self, obj: object) -> Result: """ Method to initiate an RPC request Parameters ---------- obj: object A dictionary specifying a remote method name and arguments to invoke Returns ---------- Result Result.status is 0 for success, >0 on error Result.response is any object returned by the remote method invocation or None """ # Encode the request dict as YAML try: message = yaml.safe_dump(obj) except yaml.YAMLError as exc: self.logger.error("YAML serialization error: %s", exc) self.logger.error("{0}".format(obj)) return Result(453, None) self.response = None self.corr_id = str(uuid.uuid4()) # Send RPC request to server not_sent = True attempts = 1 while not_sent and attempts < 10: try: self.channel.basic_publish(exchange='', routing_key=self.queue, body=message, properties=pika.BasicProperties( reply_to=self.callback_queue, correlation_id=self.corr_id)) not_sent = False except Exception as e: self.logger.info("Failed to send on attempt %d because connection closed. Reconnecting...", attempts) time.sleep((attempts-1)*5) if self.connection.is_closed: self.connect() attempts = attempts + 1 if not_sent: self.logger.error("Failed to deliver message %s:%s", self.queue, message.rstrip()) send_email(self.config.get("NOTIFY_ON_ERROR"), "Message delivery failure: " + self.__class__.__name__, "Failed to deliver message {0}:{1}".format(self.queue, message.rstrip())) return Result(470, "Message could not be delivered") # Wait for response attempts = 1 while self.response is None and attempts < 36: self.logger.debug("Waiting for callback response to %s", str(obj)) # Process events in self.connection.process_data_events(time_limit=5) attempts = attempts + 1 self.corr_id = None # If timed out waiting for response if attempts == 36: self.logger.error("Operation timed out waiting for a response to %s:%s", self.queue, message.rstrip()) send_email(self.config.get("NOTIFY_ON_ERROR"), "RPC timeout failure: " + self.__class__.__name__, "No response received for message {0}:{1}".format(self.queue, message.rstrip())) return Result(471, "Operation timed out waiting for a response") # Decode the response dict as YAML try: res_obj = yaml.safe_load(self.response) return Result.from_transport_format(res_obj) except yaml.YAMLError as exc: self.logger.error("YAML serialization error: %s", exc) self.logger.error("{0}".format(obj)) return Result(454, None)
def delete_user(self, name: str): return Result(0, "delete_user:" + self.name)
def get_user(self, name: str): return Result(0, "get_user:" + self.name)
def update_user(self, user_atts: dict): return Result(0, "update_user:" + self.name)
def add_user(self, user_atts: dict): return Result(0, "add_user:" + self.name)
def delete_group(self, name: str): return Result(0, "delete_group:" + self.name)
def update_group(self, group_atts: dict): return Result(0, "update_group:" + self.name)