def import_target(device_type, path, pacemaker_ha_operation, validate_importable=False): """ Passed a device type and a path import the device if such an operation make sense. For example a jbod scsi disk does not have the concept of import whilst zfs does. :param device_type: the type of device to import :param path: path of device to import :param pacemaker_ha_operation: This import is at the request of pacemaker. In HA operations the device may often have not have been cleanly exported because the previous mounted node failed in operation. :param validate_importable: The intention is to make sure the device can be imported but not actually import it. in this in incarnation the device is import and the exported checking for errors. :return: None or an Error message """ blockdevice = BlockDevice(device_type, path) error = blockdevice.import_(False) if error: if '-f' in error and pacemaker_ha_operation: error = blockdevice.import_(True) if error: console_log.error("Error importing pool: '%s'" % error) if (error is None) and (validate_importable is True): error = blockdevice.export() if error: console_log.error("Error exporting pool: '%s'" % error) return agent_ok_or_error(error)
def _configure_rsyslog(destination): from tempfile import mkstemp tmp_f, tmp_name = mkstemp(dir='/etc') f = open('/etc/rsyslog.conf', 'r') skip = False for line in f.readlines(): if skip: if line == "# added by chroma-agent\n": skip = False continue if line == "# added by chroma-agent\n": skip = True continue if not skip: os.write(tmp_f, line) f.close() if destination != "": os.write(tmp_f, "# added by chroma-agent\n" \ "$PreserveFQDN on\n" \ "*.* @@%s:%s;RSYSLOG_ForwardFormat\n" \ "# added by chroma-agent\n" % (destination, SYSLOG_PORT)) os.close(tmp_f) os.chmod(tmp_name, 0644) os.rename(tmp_name, "/etc/rsyslog.conf") error = None # signal the process and restart if the signal fails. error = rsyslog_service.reload() and rsyslog_service.restart() return agent_ok_or_error(error)
def unconfigure_corosync2(host_fqdn, mcast_port): """ Unconfigure the corosync application. For corosync2 don't disable pcsd, just remove host node from cluster and disable corosync from auto starting (service should already be stopped in state transition) Note that pcs cluster commands handle editing and removal of the corosync.conf file Return: Value using simple return protocol """ error = corosync_service.disable() if error: return agent_error(error) # Detect if we are the only node in the cluster, we want to do this before next command removes conf file cluster_nodes = _nodes_in_cluster() result = AgentShell.run(["pcs", "--force", "cluster", "node", "remove", host_fqdn]) if result.rc != 0: if "No such file or directory" in result.stderr: # we want to return successful if the configuration file does not exist console_log.warning(result.stderr) elif "Error: Unable to update any nodes" in result.stderr: # this error is expected when this is the last node in the cluster if len(cluster_nodes) != 1: return agent_error(result.stderr) else: return agent_error(result.stderr) return agent_ok_or_error( firewall_control.remove_rule(PCS_TCP_PORT, "tcp", "pcs", persist=True) or firewall_control.remove_rule(mcast_port, "udp", "corosync", persist=True) )
def _configure_pacemaker(): ''' Configure pacemaker if this node is the dc. :return: agent_ok if no error else returns an agent_error ''' pc = PacemakerConfig() timeout_time = time.time() + PACEMAKER_CONFIGURE_TIMEOUT error = None while (pc.configured is False) and (time.time() < timeout_time): if pc.is_dc: daemon_log.info( 'Configuring (global) pacemaker configuration because I am the DC' ) error = _do_configure_pacemaker(pc) if error: return agent_error(error) else: daemon_log.info( 'Not configuring (global) pacemaker configuration because I am not the DC' ) time.sleep(10) if pc.configured is False: error = 'Failed to configure (global) pacemaker configuration dc=%s' % pc.dc return agent_ok_or_error(error)
def unload_lnet(): ''' Unload the lnet modules from memory including an modules that are dependent on the lnet module. Lnet must be stopped before unload_lnet is called. ''' return agent_ok_or_error(_rmmod('lnet'))
def check_block_device(path, device_type): """ Precursor to formatting a device: check if there is already a filesystem on it. :param path: Path to a block device :param device_type: The type of device the path references :return The filesystem type of the filesystem on the device, or None if unoccupied. """ return agent_ok_or_error(BlockDevice(device_type, path).filesystem_info)
def unload_lnet(): """ Unload the lnet modules from memory including an modules that are dependent on the lnet module. Lnet must be stopped before unload_lnet is called. """ return agent_ok_or_error( AgentShell.run_canned_error_message(["lustre_rmmod"]))
def stop_lnet(): ''' Place lnet into the 'down' state, any modules that are dependent on lnet being in the 'up' state will be unloaded before lnet is stopped. ''' console_log.info("Stopping LNet") return agent_ok_or_error(_rmmod_deps("lnet", excpt=["ksocklnd", "ko2iblnd"]) or AgentShell.run_canned_error_message(["lctl", "net", "down"]))
def start_lnet(): ''' Place lnet into the 'up' state. ''' console_log.info("Starting LNet") # modprobe lust is a hack for HYD-1263 - Fix or work around LU-1279 - failure trying to mount # should be removed when LU-1279 is fixed return agent_ok_or_error(AgentShell.run_canned_error_message(["lctl", "net", "up"]) or AgentShell.run_canned_error_message(["modprobe", "lustre"]))
def configure_ntp(ntp_server): """ Change the ntp configuration file to use the server passed :return: Value using simple return protocol """ error = NTPConfig().add(ntp_server) if error: return error else: return agent_ok_or_error(ntp_service.restart())
def unconfigure_fencing(): # only unconfigure if we are the only node in the cluster # but first, see if pacemaker is up to answer this if not _pacemaker_running(): # and just skip doing this if it's not return 0 if _get_cluster_size() > 1: return 0 return agent_ok_or_error(_unconfigure_fencing())
def change_mcast_port(old_mcast_port, new_mcast_port): """ Update corosync configuration with a new mcast_port on this managed server (not all the nodes in the cluster) Corosync will read the updated value in the configuration file, which it is polling for updates. Return: Value using simple return protocol """ file_edit_args = ['sed', '-i.bak', 's/mcastport:.*/mcastport: %s/g' % new_mcast_port, COROSYNC_CONF_PATH] return agent_ok_or_error(firewall_control.remove_rule(old_mcast_port, "udp", "corosync", persist=True) or firewall_control.add_rule(new_mcast_port, "udp", "corosync", persist=True) or AgentShell.run_canned_error_message(file_edit_args))
def stop_lnet(): """ Place lnet into the 'down' state, any modules that are dependent on lnet being in the 'up' state will be unloaded before lnet is stopped. """ console_log.info("Stopping LNet") return agent_ok_or_error( AgentShell.run_canned_error_message(["lustre_rmmod", "ptlrpc"]) or AgentShell.run_canned_error_message( ["lnetctl", "lnet", "unconfigure"]))
def configure_corosync2_stage_2(ring0_name, ring1_name, new_node_fqdn, mcast_port, pcs_password, create_cluster): """Process configuration including peers and negotiated multicast port, no IP address information required Note: "The pcs cluster setup command will automatically configure two_node: 1 in corosync.conf, so a two-node cluster will "just work". If you are using a different cluster shell, you will have to configure corosync.conf appropriately yourself." Therefore no-quorum-policy does not have to be set when setting up cluster with pcs. :param ring0_name: :param ring1_name: :param peer_fqdns: :param mcast_port: :return: """ interfaces = [InterfaceInfo(CorosyncRingInterface(name=ring0_name, ringnumber=0, mcastport=mcast_port), None, None), InterfaceInfo(CorosyncRingInterface(name=ring1_name, ringnumber=1, mcastport=mcast_port), None, None)] config_params = { 'token': '17000', 'fail_recv_const': '10', 'transport': 'udp', 'rrpmode': 'passive', 'addr0': interfaces[0].corosync_iface.bindnetaddr, 'addr1': interfaces[1].corosync_iface.bindnetaddr, 'mcast0': interfaces[0].corosync_iface.mcastaddr, 'mcast1': interfaces[1].corosync_iface.mcastaddr, 'mcastport0': interfaces[0].corosync_iface.mcastport, 'mcastport1': interfaces[1].corosync_iface.mcastport } # authenticate nodes in cluster authenticate_nodes_in_cluster_command = ['pcs', 'cluster', 'auth', new_node_fqdn, '-u', PCS_USER, '-p', pcs_password] # build command string for setup of cluster which will result in corosync.conf rather than # writing from template, note we don't start the cluster here as services are managed # independently if create_cluster: cluster_setup_command = ['pcs', 'cluster', 'setup', '--name', PCS_CLUSTER_NAME, '--force'] + [new_node_fqdn] for param in ['transport', 'rrpmode', 'addr0', 'mcast0', 'mcastport0', 'addr1', 'mcast1', 'mcastport1', 'token', 'fail_recv_const']: # pull this value from the dictionary using parameter keyword cluster_setup_command.extend(["--" + param, str(config_params[param])]) else: cluster_setup_command = ['pcs', 'cluster', 'node', 'add', new_node_fqdn] return agent_ok_or_error(AgentShell.run_canned_error_message(authenticate_nodes_in_cluster_command) or AgentShell.run_canned_error_message(cluster_setup_command))
def unconfigure_pacemaker(): # only unconfigure if we are the only node in the cluster # but first, see if pacemaker is up to answer this if not _pacemaker_running(): # and just skip doing this if it's not return agent_result_ok if _get_cluster_size() < 2: # last node, nuke the CIB cibadmin(["-f", "-E"]) return agent_ok_or_error(pacemaker_service.stop() or pacemaker_service.disable())
def configure_corosync2_stage_1(mcast_port, pcs_password): # need to use user "hacluster" which is created on install of "pcs" package, # WARNING: clear text password set_password_command = ['bash', '-c', 'echo %s | passwd --stdin %s' % (pcs_password, PCS_USER)] return agent_ok_or_error(AgentShell.run_canned_error_message(set_password_command) or firewall_control.add_rule(mcast_port, "udp", "corosync", persist=True) or firewall_control.add_rule(PCS_TCP_PORT, "tcp", "pcs", persist=True) or pcsd_service.start() or corosync_service.enable() or pcsd_service.enable())
def configure_ntp(ntp_server): """ Change the ntp configuration file to use the server passed :return: Value using simple return protocol """ error = NTPConfig().add(ntp_server) if error: return error else: chrony_service.stop(validate_time=0.5) chrony_service.disable() ntp_service.enable() return agent_ok_or_error(ntp_service.restart())
def configure_corosync(ring0_name, ring1_name, old_mcast_port, new_mcast_port): """ Process configuration including negotiated multicast port, no IP address information required :param ring0_name: :param ring1_name: :param old_mcast_port: None if we are configuring corosync for the first-time, present if changing mcast port :param new_mcast_port: desired corosync multicast port as configured by user :return: Value using simple return protocol """ interfaces = [ InterfaceInfo( CorosyncRingInterface(name=ring0_name, ringnumber=0, mcastport=new_mcast_port), None, None, ), InterfaceInfo( CorosyncRingInterface(name=ring1_name, ringnumber=1, mcastport=new_mcast_port), None, None, ), ] config = render_config( [interface.corosync_iface for interface in interfaces]) write_config_to_file("/etc/corosync/corosync.conf", config) if old_mcast_port is not None: error = firewall_control.remove_rule(old_mcast_port, "udp", "corosync", persist=True) if error: return agent_error(error) return agent_ok_or_error( firewall_control.add_rule( new_mcast_port, "udp", "corosync", persist=True) or corosync_service.enable())
def export_target(device_type, path): """ Passed a device type and a path export the device if such an operation make sense. For example a jbod scsi disk does not have the concept of export whilst zfs does. :param path: path of device to export :param device_type: the type of device to export :return: None or an Error message """ blockdevice = BlockDevice(device_type, path) error = blockdevice.export() if error: console_log.error("Error exporting pool: '%s'" % error) return agent_ok_or_error(error)
def configure_corosync2_stage_1(mcast_port, pcs_password, fqdn=None): # need to use user "hacluster" which is created on install of "pcs" package, # WARNING: clear text password set_password_command = [ "bash", "-c", "echo %s | passwd --stdin %s" % (pcs_password, PCS_USER), ] if fqdn is not None: error = AgentShell.run_canned_error_message( ["hostnamectl", "set-hostname", fqdn]) if error: return agent_error(error) return agent_ok_or_error( AgentShell.run_canned_error_message(set_password_command) or firewall_control.add_rule(mcast_port, "udp", "corosync", persist=True) or firewall_control.add_rule(PCS_TCP_PORT, "tcp", "pcs", persist=True) or pcsd_service.start() or corosync_service.enable() or pcsd_service.enable())
def import_target(device_type, path, pacemaker_ha_operation): """ Passed a device type and a path import the device if such an operation make sense. For example a jbod scsi disk does not have the concept of import whilst zfs does. :param device_type: the type of device to import :param path: path of device to import :param pacemaker_ha_operation: This import is at the request of pacemaker. In HA operations the device may often have not have been cleanly exported because the previous mounted node failed in operation. :return: None or an Error message """ blockdevice = BlockDevice(device_type, path) error = blockdevice.import_(False) if error: if '-f' in error and pacemaker_ha_operation: error = blockdevice.import_(True) if error: console_log.error("Error importing pool: '%s'" % error) return agent_ok_or_error(error)
def enable_pacemaker(): return agent_ok_or_error(pacemaker_service.enable())
def stop_pacemaker(): return agent_ok_or_error(pacemaker_service.stop())
def stop_corosync2(): return agent_ok_or_error(corosync_service.stop())
def open_firewall(port, address, proto, description, persist): firewall_control = FirewallControl.create() return agent_ok_or_error( firewall_control.add_rule(port, proto, description, persist, address))
def start_corosync2(): return agent_ok_or_error(corosync_service.enable() or corosync_service.start())
def _start_service(): return agent_ok_or_error(agent_service.start())
def _disable_service(): return agent_ok_or_error(agent_service.disable())
def _enable_service(): return agent_ok_or_error(agent_service.enable())
def _stop_service(): return agent_ok_or_error(agent_service.stop())