def __init__(self, patching, logger, hutil): """ """ self.patching = patching self.logger = logger self.hutil = hutil try: self.mounts = Mounts(patching=self.patching, logger=self.logger) except Exception as e: errMsg = 'Failed to retrieve mount points, Exception %s, stack trace: %s' % ( str(e), traceback.format_exc()) self.logger.log(errMsg, True, 'Warning') self.logger.log(str(e), True) self.mounts = None self.frozen_items = set() self.unfrozen_items = set() self.freeze_handler = FreezeHandler(self.logger, self.hutil) self.mount_open_failed = False resource_disk = ResourceDiskUtil(patching=patching, logger=logger) self.resource_disk_mount_point = resource_disk.get_resource_disk_mount_point( ) self.skip_freeze = True self.isAquireLockSucceeded = True self.getLockRetry = 0 self.maxGetLockRetry = 5
def __init__(self, patching, logger, hutil): """ """ self.patching = patching self.logger = logger self.hutil = hutil self.safeFreezeFolderPath = "safefreeze/bin/safefreeze" self.isArm64Machine = False try: platformMachine = platform.machine() architectureFromUname = os.uname()[-1] self.logger.log("platformMachine : " + str(platformMachine) + " architectureFromUname : " + str(architectureFromUname)) if ((platformMachine != None and (platformMachine.startswith("aarch64") or platformMachine.startswith("arm64"))) or (architectureFromUname != None and (architectureFromUname.startswith("aarch64") or architectureFromUname.startswith("arm64")))): self.isArm64Machine = True except Exception as e: errorMsg = "Unable to fetch machine processor architecture, error: %s, stack trace: %s" % ( str(e), traceback.format_exc()) self.logger.log(errorMsg, 'Error') if (self.isArm64Machine == True): self.logger.log("isArm64Machine : " + str(self.isArm64Machine) + " Using ARM64 safefreeze binary") self.safeFreezeFolderPath = "safefreezeArm64/bin/safefreeze" else: self.logger.log("isArm64Machine : " + str(self.isArm64Machine) + " Using x64 safefreeze binary") self.safeFreezeFolderPath = "safefreeze/bin/safefreeze" try: self.mounts = Mounts(patching=self.patching, logger=self.logger) except Exception as e: errMsg = 'Failed to retrieve mount points, Exception %s, stack trace: %s' % ( str(e), traceback.format_exc()) self.logger.log(errMsg, True, 'Warning') self.logger.log(str(e), True) self.mounts = None self.frozen_items = set() self.unfrozen_items = set() self.freeze_handler = FreezeHandler(self.logger, self.hutil) self.mount_open_failed = False resource_disk = ResourceDiskUtil(patching=patching, logger=logger) self.resource_disk_mount_point = resource_disk.get_resource_disk_mount_point( ) self.skip_freeze = True self.isAquireLockSucceeded = True self.getLockRetry = 0 self.maxGetLockRetry = 5 self.safeFreezelockFile = None
def get_total_used_size(self): try: size_calc_failed = False df = subprocess.Popen(["df", "-k"], stdout=subprocess.PIPE) ''' Sample output of the df command Filesystem Type 1K-blocks Used Avail Use% Mounted on /dev/sda2 xfs 52155392 3487652 48667740 7% / devtmpfs devtmpfs 7170976 0 7170976 0% /dev tmpfs tmpfs 7180624 0 7180624 0% /dev/shm tmpfs tmpfs 7180624 760496 6420128 11% /run tmpfs tmpfs 7180624 0 7180624 0% /sys/fs/cgroup /dev/sda1 ext4 245679 151545 76931 67% /boot /dev/sdb1 ext4 28767204 2142240 25140628 8% /mnt/resource /dev/mapper/mygroup-thinv1 xfs 1041644 33520 1008124 4% /bricks/brick1 /dev/mapper/mygroup-85197c258a54493da7880206251f5e37_0 xfs 1041644 33520 1008124 4% /run/gluster/snaps/85197c258a54493da7880206251f5e37/brick2 /dev/mapper/mygroup2-thinv2 xfs 15717376 5276944 10440432 34% /tmp/test /dev/mapper/mygroup2-63a858543baf4e40a3480a38a2f232a0_0 xfs 15717376 5276944 10440432 34% /run/gluster/snaps/63a858543baf4e40a3480a38a2f232a0/brick2 tmpfs tmpfs 1436128 0 1436128 0% /run/user/1000 //Centos72test/cifs_test cifs 52155392 4884620 47270772 10% /mnt/cifs_test2 ''' output = "" process_wait_time = 300 while (df is not None and process_wait_time > 0 and df.poll() is None): time.sleep(1) process_wait_time -= 1 self.logger.log( "df command executed for process wait time value" + str(process_wait_time), True) if (df is not None and df.poll() is not None): self.logger.log("df return code" + str(df.returncode), True) output = df.stdout.read() if sys.version_info > (3, ): output = str(output, encoding='utf-8', errors="backslashreplace") else: output = str(output) output = output.strip().split("\n") disk_loop_devices_file_systems = self.get_loop_devices() self.logger.log("outside loop device", True) total_used = 0 total_used_network_shares = 0 total_used_gluster = 0 total_used_loop_device = 0 total_used_temporary_disks = 0 total_used_ram_disks = 0 total_used_unknown_fs = 0 actual_temp_disk_used = 0 total_sd_size = 0 network_fs_types = [] unknown_fs_types = [] if len(self.file_systems_info) == 0: self.file_systems_info = disk_util.get_mount_file_systems() output_length = len(output) index = 1 self.resource_disk = ResourceDiskUtil(patching=self.patching, logger=self.logger) resource_disk_device = self.resource_disk.get_resource_disk_mount_point( 0) resource_disk_device = "/dev/{0}".format(resource_disk_device) device_list = self.device_list_for_billing( ) #new logic: calculate the disk size for billing while index < output_length: if (len(output[index].split()) < 6): #when a row is divided in 2 lines index = index + 1 if (index < output_length and len(output[index - 1].split()) + len(output[index].split()) == 6): output[index] = output[index - 1] + output[index] else: self.logger.log( "Output of df command is not in desired format", True) total_used = 0 size_calc_failed = True break device, size, used, available, percent, mountpoint = output[ index].split() fstype = '' isNetworkFs = False isKnownFs = False for file_system_info in self.file_systems_info: if device == file_system_info[ 0] and mountpoint == file_system_info[2]: fstype = file_system_info[1] self.logger.log( "Device name : {0} fstype : {1} size : {2} used space in KB : {3} available space : {4} mountpoint : {5}" .format(device, fstype, size, used, available, mountpoint), True) for nonPhysicaFsType in self.non_physical_file_systems: if nonPhysicaFsType in fstype.lower(): isNetworkFs = True break for knownFs in self.known_fs: if knownFs in fstype.lower(): isKnownFs = True break if device == resource_disk_device and self.isOnlyOSDiskBackupEnabled == False: # adding log to check difference in billing of temp disk self.logger.log( "Actual temporary disk, Device name : {0} used space in KB : {1} fstype : {2}" .format(device, used, fstype), True) actual_temp_disk_used = int(used) if device in device_list and device != resource_disk_device: self.logger.log( "Adding sd* partition, Device name : {0} used space in KB : {1} fstype : {2}" .format(device, used, fstype), True) total_sd_size = total_sd_size + int( used) #calcutale total sd* size just skip temp disk if not (isKnownFs or fstype == '' or fstype == None): unknown_fs_types.append(fstype) if isNetworkFs: if fstype not in network_fs_types: network_fs_types.append(fstype) self.logger.log( "Not Adding network-drive, Device name : {0} used space in KB : {1} fstype : {2}" .format(device, used, fstype), True) total_used_network_shares = total_used_network_shares + int( used) elif device == "/dev/sdb1" and self.isOnlyOSDiskBackupEnabled == False: #<todo> in some cases root is mounted on /dev/sdb1 self.logger.log( "Not Adding temporary disk, Device name : {0} used space in KB : {1} fstype : {2}" .format(device, used, fstype), True) total_used_temporary_disks = total_used_temporary_disks + int( used) elif "tmpfs" in fstype.lower() or "devtmpfs" in fstype.lower( ) or "ramdiskfs" in fstype.lower() or "rootfs" in fstype.lower( ): self.logger.log( "Not Adding RAM disks, Device name : {0} used space in KB : {1} fstype : {2}" .format(device, used, fstype), True) total_used_ram_disks = total_used_ram_disks + int(used) elif 'loop' in device and device not in disk_loop_devices_file_systems: self.logger.log( "Not Adding Loop Device , Device name : {0} used space in KB : {1} fstype : {2}" .format(device, used, fstype), True) total_used_loop_device = total_used_loop_device + int(used) elif (mountpoint.startswith('/run/gluster/snaps/')): self.logger.log( "Not Adding Gluster Device , Device name : {0} used space in KB : {1} mount point : {2}" .format(device, used, mountpoint), True) total_used_gluster = total_used_gluster + int(used) elif device.startswith('\\\\') or device.startswith('//'): self.logger.log( "Not Adding network-drive as it starts with slahes, Device name : {0} used space in KB : {1} fstype : {2}" .format(device, used, fstype), True) total_used_network_shares = total_used_network_shares + int( used) else: if (self.isOnlyOSDiskBackupEnabled == True): if (mountpoint == '/'): total_used = total_used + int(used) self.logger.log( "Adding only root device to size calculation. Device name : {0} used space in KB : {1} mount point : {2} fstype : {3}" .format(device, used, mountpoint, fstype), True) self.logger.log( "Total Used Space: {0}".format(total_used), True) else: self.logger.log( "Adding Device name : {0} used space in KB : {1} mount point : {2} fstype : {3}" .format(device, used, mountpoint, fstype), True) total_used = total_used + int(used) #return in KB if not (isKnownFs or fstype == '' or fstype == None): total_used_unknown_fs = total_used_unknown_fs + int( used) index = index + 1 if not len(unknown_fs_types) == 0: Utils.HandlerUtil.HandlerUtility.add_to_telemetery_data( "unknownFSTypeInDf", str(unknown_fs_types)) Utils.HandlerUtil.HandlerUtility.add_to_telemetery_data( "totalUsedunknownFS", str(total_used_unknown_fs)) self.logger.log( "Total used space in Bytes of unknown FSTypes : {0}". format(total_used_unknown_fs * 1024), True) if total_used_temporary_disks != actual_temp_disk_used: self.logger.log( "Billing differenct because of incorrect temp disk: {0}". format( str(total_used_temporary_disks - actual_temp_disk_used))) if not len(network_fs_types) == 0: Utils.HandlerUtil.HandlerUtility.add_to_telemetery_data( "networkFSTypeInDf", str(network_fs_types)) Utils.HandlerUtil.HandlerUtility.add_to_telemetery_data( "totalUsedNetworkShare", str(total_used_network_shares)) self.logger.log( "Total used space in Bytes of network shares : {0}".format( total_used_network_shares * 1024), True) if total_used_gluster != 0: Utils.HandlerUtil.HandlerUtility.add_to_telemetery_data( "glusterFSSize", str(total_used_gluster)) if total_used_temporary_disks != 0: Utils.HandlerUtil.HandlerUtility.add_to_telemetery_data( "tempDisksSize", str(total_used_temporary_disks)) if total_used_ram_disks != 0: Utils.HandlerUtil.HandlerUtility.add_to_telemetery_data( "ramDisksSize", str(total_used_ram_disks)) if total_used_loop_device != 0: Utils.HandlerUtil.HandlerUtility.add_to_telemetery_data( "loopDevicesSize", str(total_used_loop_device)) self.logger.log( "Total used space in Bytes : {0}".format(total_used * 1024), True) if total_sd_size != 0: Utils.HandlerUtil.HandlerUtility.add_to_telemetery_data( "totalsdSize", str(total_sd_size)) self.logger.log( "Total sd* used space in Bytes : {0}".format(total_sd_size * 1024), True) return total_used * 1024, size_calc_failed #Converting into Bytes except Exception as e: errMsg = 'Unable to fetch total used space with error: %s, stack trace: %s' % ( str(e), traceback.format_exc()) self.logger.log(errMsg, True) size_calc_failed = True return 0, size_calc_failed
class FsFreezer: def __init__(self, patching, logger, hutil): """ """ self.patching = patching self.logger = logger self.hutil = hutil try: self.mounts = Mounts(patching=self.patching, logger=self.logger) except Exception as e: errMsg = 'Failed to retrieve mount points, Exception %s, stack trace: %s' % ( str(e), traceback.format_exc()) self.logger.log(errMsg, True, 'Warning') self.logger.log(str(e), True) self.mounts = None self.frozen_items = set() self.unfrozen_items = set() self.freeze_handler = FreezeHandler(self.logger, self.hutil) self.mount_open_failed = False self.resource_disk = ResourceDiskUtil(patching=patching, logger=logger) self.skip_freeze = True self.isAquireLockSucceeded = True self.getLockRetry = 0 self.maxGetLockRetry = 5 def should_skip(self, mount): resource_disk_mount_point = self.resource_disk.get_resource_disk_mount_point( ) if (resource_disk_mount_point is not None and mount.mount_point == resource_disk_mount_point): return True elif ((mount.fstype == 'ext3' or mount.fstype == 'ext4' or mount.fstype == 'xfs' or mount.fstype == 'btrfs') and mount.type != 'loop'): return False else: return True def freeze_safe(self, timeout): self.root_seen = False error_msg = '' timedout = False self.skip_freeze = True mounts_to_skip = None try: mounts_to_skip = self.hutil.get_strvalue_from_configfile( 'MountsToSkip', '') self.logger.log("skipped mount :" + str(mounts_to_skip), True) mounts_list_to_skip = mounts_to_skip.split(',') except Exception as e: errMsg = 'Failed to read from config, Exception %s, stack trace: %s' % ( str(e), traceback.format_exc()) self.logger.log(errMsg, True, 'Warning') try: freeze_result = FreezeResult() freezebin = os.path.join(os.getcwd(), os.path.dirname(__file__), "safefreeze/bin/safefreeze") args = [freezebin, str(timeout)] no_mount_found = True for mount in self.mounts.mounts: self.logger.log("fsfreeze mount :" + str(mount.mount_point), True) if (mount.mount_point == '/'): self.root_seen = True self.root_mount = mount elif (mount.mount_point not in mounts_list_to_skip and not self.should_skip(mount)): if (self.skip_freeze == True): self.skip_freeze = False args.append(str(mount.mount_point)) if (self.root_seen and not self.should_skip(self.root_mount)): if (self.skip_freeze == True): self.skip_freeze = False args.append('/') self.logger.log("skip freeze is : " + str(self.skip_freeze), True) if (self.skip_freeze == True): return freeze_result, timedout self.logger.log("arg : " + str(args), True) self.freeze_handler.reset_signals() self.freeze_handler.signal_receiver() self.logger.log("proceeded for accepting signals", True) if (mounts_to_skip == '/' ): #for continue logging to avoid out of memory issue self.logger.enforce_local_flag(True) else: self.logger.enforce_local_flag(False) start_time = datetime.datetime.utcnow() while self.getLockRetry < self.maxGetLockRetry: try: if not os.path.isdir( '/etc/azure/MicrosoftRecoverySvcsSafeFreezeLock'): os.mkdir( '/etc/azure/MicrosoftRecoverySvcsSafeFreezeLock') file = open( "/etc/azure/MicrosoftRecoverySvcsSafeFreezeLock/SafeFreezeLockFile", "w") self.logger.log( "/etc/azure/MicrosoftRecoverySvcsSafeFreezeLock/SafeFreezeLockFile file opened Sucessfully", True) try: fcntl.lockf(file, fcntl.LOCK_EX | fcntl.LOCK_NB) self.logger.log("Aquiring lock succeeded", True) self.isAquireLockSucceeded = True break except Exception as ex: file.close() raise ex except Exception as e: self.logger.log( "Failed to open file or aquire lock: " + str(e), True) self.isAquireLockSucceeded = False self.getLockRetry = self.getLockRetry + 1 time.sleep(1) if (self.getLockRetry == self.maxGetLockRetry - 1): time.sleep(30) self.logger.log( "Retry to aquire lock count: " + str(self.getLockRetry), True) end_time = datetime.datetime.utcnow() self.logger.log( "Wait time to aquire lock " + str(end_time - start_time), True) sig_handle = None if (self.isAquireLockSucceeded == True): sig_handle = self.freeze_handler.startproc(args) self.thaw_safe() try: fcntl.lockf(file, fcntl.LOCK_UN) file.close() except: pass try: os.remove( "/etc/azure/MicrosoftRecoverySvcsSafeFreezeLock/SafeFreezeLockFile" ) except: pass self.logger.log( "freeze_safe after returning from startproc : sig_handle=" + str(sig_handle)) if (sig_handle != 1): if (self.freeze_handler.child is not None): self.log_binary_output() if (sig_handle == 0): timedout = True error_msg = "freeze timed-out" freeze_result.errors.append(error_msg) self.logger.log(error_msg, True, 'Error') elif (self.mount_open_failed == True): error_msg = CommonVariables.unable_to_open_err_string freeze_result.errors.append(error_msg) self.logger.log(error_msg, True, 'Error') elif (self.isAquireLockSucceeded == False): error_msg = "Mount Points already freezed by some other processor" freeze_result.errors.append(error_msg) self.logger.log(error_msg, True, 'Error') else: error_msg = "freeze failed for some mount" freeze_result.errors.append(error_msg) self.logger.log(error_msg, True, 'Error') except Exception as e: self.logger.enforce_local_flag(True) error_msg = 'freeze failed for some mount with exception, Exception %s, stack trace: %s' % ( str(e), traceback.format_exc()) freeze_result.errors.append(error_msg) self.logger.log(error_msg, True, 'Error') return freeze_result, timedout def thaw_safe(self): thaw_result = FreezeResult() unable_to_sleep = False if (self.skip_freeze == True): return thaw_result, unable_to_sleep if (self.freeze_handler.child is None): self.logger.log("child already completed", True) self.logger.log( "****** 7. Error - Binary Process Already Completed", True) error_msg = 'snapshot result inconsistent' thaw_result.errors.append(error_msg) elif (self.freeze_handler.child.poll() is None): self.logger.log("child process still running") self.logger.log("****** 7. Sending Thaw Signal to Binary") self.freeze_handler.child.send_signal(signal.SIGUSR1) for i in range(0, 30): if (self.freeze_handler.child.poll() is None): self.logger.log("child still running sigusr1 sent") time.sleep(1) else: break self.logger.enforce_local_flag(True) self.log_binary_output() if (self.freeze_handler.child.returncode != 0): error_msg = 'snapshot result inconsistent as child returns with failure' thaw_result.errors.append(error_msg) self.logger.log(error_msg, True, 'Error') else: self.logger.log( "Binary output after process end when no thaw sent: ", True) if (self.freeze_handler.child.returncode == 2): error_msg = 'Unable to execute sleep' thaw_result.errors.append(error_msg) unable_to_sleep = True else: error_msg = 'snapshot result inconsistent' thaw_result.errors.append(error_msg) self.logger.enforce_local_flag(True) self.log_binary_output() self.logger.log(error_msg, True, 'Error') self.logger.enforce_local_flag(True) return thaw_result, unable_to_sleep def log_binary_output(self): self.logger.log( "============== Binary output traces start ================= ", True) while True: line = self.freeze_handler.child.stdout.readline() if sys.version_info > (3, ): line = str(line, encoding='utf-8', errors="backslashreplace") else: line = str(line) if ("Failed to open:" in line): self.mount_open_failed = True if (line != ''): self.logger.log(line.rstrip(), True) else: break self.logger.log( "============== Binary output traces end ================= ", True)