def dlm_fence_daemon(node_id): n = int(node_id) log.debug("Starting dlm_fence_daemon on node_id=%d" % n) wd = os.open("/dev/watchdog", os.O_WRONLY) def dlm_fence_daemon_signal_handler(sig, frame): log.debug("dlm_fence_daemon_signal_handler") os.write(wd, "V") os.close(wd) log.debug("dlm_fence_daemon: exiting cleanly") exit(0) signal.signal(signal.SIGUSR1, dlm_fence_daemon_signal_handler) demonize() while True: f = util.lock_file("SSSS", DLMREF_LOCK, "r+") d = shelve.open(DLMREF) klist = d.keys() for key in klist: bd = "/dev/" + key + "/sbd" ret = block_read(bd, BLK_SIZE * 2 * n) if ret == MSG_OK: pass elif ret == MSG_FENCE: log.debug("dlm_fence_daemon: MSG_FENCE") log.debug("dlm_fence_daemon: Setting WD timeout to 1 second") s = struct.pack ("i", 1) fcntl.ioctl(wd, 3221509894 , s) log.debug("dlm_fence_daemon: writing MSG_FENCE_ACK") ret = block_write(bd, BLK_SIZE * ((2 * n) + 1), MSG_FENCE_ACK) log.debug("dlm_fence_daemon: MSG_FENCE_ACK sent") # host will be fenced in 1 second d.close() util.unlock_file("SSSS", f) os.write(wd, "w") time.sleep(1)
def detach(self, dbg, sr): import shelve # Get the iSCSI uri from the SR metadata uri = getFromSRMetadata(dbg, sr, 'uri') # Get the unique_id from the SR metadata unique_id = getFromSRMetadata(dbg, sr, 'unique_id') # stop GC try: pass # VHDCoalesce.stop_gc(dbg, "gfs2", sr) except: log.debug("GC already stopped") # Unmount the FS mnt_path = urlparse.urlparse(sr).path umount(dbg, mnt_path) dlmref = os.path.join(DLM_REFDIR, "dlmref") f = util.lock_file(dbg, dlmref + ".lock", "r+") d = shelve.open(dlmref) del d[str(unique_id)] klist = d.keys() current = len(klist) d.close() if current == 0: cmd = ["/usr/bin/systemctl", "stop", "dlm"] call(dbg, cmd) # stop fencing daemon node_id = get_node_id(dbg) log.debug("Calling dlm_fence_daemon_stop: node_id=%d" % node_id) fence_tool.dlm_fence_daemon_stop(node_id) util.unlock_file(dbg, f) # deactivate gfs2 LV cmd = ["/usr/sbin/lvchange", "-an", unique_id + "/gfs2"] call(dbg, cmd) # Fixme: kill fencing daemon # deactivate sbd LV cmd = ["/usr/sbin/lvchange", "-an", unique_id + "/sbd"] call(dbg, cmd) # Unplug device if need be unplug_device(dbg, uri)
def ls(self, dbg, sr): pv_name = getPVName(dbg,sr) vg_name = getVGName(dbg,sr) lv_name = "/dev/" + vg_name +"/gfs2" try: # refresh iscsi connection to reflect LUN's new size call(dbg, ["/usr/sbin/iscsiadm", "-m", "node", "-R"]) # Does not matter if LUN is resized or not, go ahead and resize pv, # incase if LUN is resized pv size will get updated call(dbg, ["/usr/sbin/pvresize" , pv_name, "--config", "global{metadata_read_only=0}"]) # if pv was expanded, this will reflect as freespace # in the associated volume group, only then we need to expand gfs2 lv stats = vg_stats(dbg,vg_name) if stats['freespace'] > VG_FREE_SPACE_THRESHOLD: log.debug("Free space (%s) detected in VG, expanding gfs2 LV." %str(stats['freespace'])) opq = urlparse.urlparse(sr).path try: gl = os.path.join(opq, "gl") f = util.lock_file(dbg, gl, "w+") # extend lv call(dbg, ["lvextend", "-l+100%FREE", lv_name, "--config", "global{metadata_read_only=0}"]) #inform other node about LUN resize inventory = xcp.environ.readInventory() session = XenAPI.xapi_local() session.xenapi.login_with_password("root", "") this_host = session.xenapi.host.get_by_uuid( inventory.get("INSTALLATION_UUID")) for host in session.xenapi.host.get_all(): if host != this_host: log.debug("%s: setup host %s" % (dbg, session.xenapi.host.get_name_label(host))) session.xenapi.host.call_plugin( host, "gfs2setup", "refreshDM", {'lv_name': lv_name, 'pv_dev': pv_name.split('/')[2]}) # grow gfs2 call(dbg, ["gfs2_grow", mountpoint_root + "dev/" + vg_name +"/gfs2"]) except Exception, e: raise e finally: if f: util.unlock_file(dbg,f)
def dlm_fence_node(node_id): n = int(node_id) log.debug("dlm_fence_node node_id=%d" % n) f = util.lock_file("dlm_fence_node", DLMREF_LOCK, "r+") d = shelve.open(DLMREF) klist = d.keys() for key in klist: bd = "/dev/" + key + "/sbd" ret = block_write(bd, BLK_SIZE * 2 * n, MSG_FENCE) d.close() util.unlock_file("dlm_fence_node", f) # Wait for an ACK for WD_TIMEOUT + 10 seconds or assume # node has been fenced for i in range(1, WD_TIMEOUT + 10): f = util.lock_file("dlm_fence_node", DLMREF_LOCK, "r+") d = shelve.open(DLMREF) klist = d.keys() for key in klist: bd = "/dev/" + key + "/sbd" ret = block_read(bd, BLK_SIZE * ((2 * n) + 1)) if ret == MSG_FENCE_ACK: log.debug("dlm_fence_node got MSG_FENCE_ACK for node_id=%d" % n) time.sleep(2) util.unlock_file("dlm_fence_node", f) exit(0) d.close() util.unlock_file("dlm_fence_node", f) time.sleep(1) log.debug("dlm_fence_node ACKING FENCE after TIMEOUT for node_id=%d" % n)
def attach(self, dbg, uri): import shelve log.debug("%s: SR.attach: uri=%s" % (dbg, uri)) # Zone in the LUN on this host dev_path = plug_device(dbg, uri) unique_id = get_unique_id_from_dev_path(dev_path) gfs2_dev_path = "/dev/" + unique_id + "/gfs2" tmp_mnt_check = getSRMountPath(dbg, gfs2_dev_path, False) try: if os.path.ismount(tmp_mnt_check): log.debug("%s: SR.attach: uri=%s ALREADY ATTACHED" % (dbg, uri)) return "file://" + tmp_mnt_check except: log.debug("%s: SR.attach: uri=%s NOT ATTACHED YET" % (dbg, uri)) output = "" for attempt in range(1,20): try: output = call(dbg, ["/usr/bin/systemctl", "is-active", "corosync"]).rstrip() break except: time.sleep(1) if output != "active": # Notify other pool members we have arrived inventory = xcp.environ.readInventory() session = XenAPI.xapi_local() session.xenapi.login_with_password("root", "") this_host = session.xenapi.host.get_by_uuid( inventory.get("INSTALLATION_UUID")) # FIXME: Do not notify offline hosts # FIXME: See ffs.call_plugin_in_pool() for host in session.xenapi.host.get_all(): log.debug("%s: refresh host %s config file" % (dbg, session.xenapi.host.get_name_label(host))) session.xenapi.host.call_plugin( host, "gfs2setup", "gfs2UpdateConf", {}) for host in session.xenapi.host.get_all(): if host != this_host: log.debug("%s: setup host %s" % (dbg, session.xenapi.host.get_name_label(host))) session.xenapi.host.call_plugin( host, "gfs2setup", "gfs2Reload", {}) # this_host will reload last log.debug("%s: refresh host %s" % (dbg, session.xenapi.host.get_name_label(this_host))) session.xenapi.host.call_plugin( this_host, "gfs2setup", "gfs2Reload", {}) else: sysconfigdlm = open("/etc/sysconfig/dlm", "w") sysconfigdlm.write("DLM_CONTROLD_OPTS=\"--enable_fencing=1 --enable_quorum_fencing=1 -K\"\n") sysconfigdlm.close() conf = """fence_all /usr/libexec/xapi-storage-script/volume/org.xen.xapi.storage.gfs2/fence_tool.py """ if not os.path.exists("/etc/dlm"): os.mkdir("/etc/dlm") dlmconf = open("/etc/dlm/dlm.conf", "w") dlmconf.write(conf) dlmconf.close() call(dbg, ["/usr/sbin/modprobe", "dlm"]) call(dbg, ["/usr/sbin/modprobe", "gfs2"]) call(dbg, ["/usr/sbin/modprobe", "xen_wdt"]) # activate sbd LV cmd = ["/usr/sbin/lvchange", "-ay", unique_id + "/sbd"] call(dbg, cmd) # initialise region on sbd for fencing daemon node_id = get_node_id(dbg) fence_tool.dlm_fence_clear_by_id(node_id, unique_id) # Lock refcount file before starting dlm if not os.path.exists(DLM_REFDIR): os.mkdir(DLM_REFDIR) dlmref = os.path.join(DLM_REFDIR, "dlmref") f = util.lock_file(dbg, dlmref + ".lock", "a+") d = shelve.open(dlmref) klist = d.keys() previous = len(klist) d[str(unique_id)] = 0 d.close() log.debug("previous_scsi_ids=%d" % previous) if previous == 0: # Start fencing daemon log.debug("Calling dlm_fence_daemon_start: node_id=%d" % node_id) fence_tool.dlm_fence_daemon_start(node_id) # start dlm cmd = ["/usr/bin/systemctl", "start", "dlm"] call(dbg, cmd) util.unlock_file(dbg, f) # activate gfs2 LV cmd = ["/usr/sbin/lvchange", "-ay", unique_id + "/gfs2"] call(dbg, cmd) # Mount the gfs2 filesystem mnt_path = mount(dbg, gfs2_dev_path) log.debug("%s: mounted on %s" % (dbg, mnt_path)) sr = "file://" + mnt_path # Start GC for this host # VHDCoalesce.start_gc(dbg, "gfs2", sr) return sr