def send_daily_update_email(): usage = psutil.disk_usage(get_device_by_mountpoint(get_plot_drive_to_use())[0][0]) if read_config_data('plot_manager_config', 'notifications', 'daily_update', True): for email_address in system_info.alert_email: send_template_email(template='daily_update.html', recipient=email_address, subject='NAS Server Daily Update\nContent-Type: text/html', current_time=current_military_time, nas_server=nas_server, current_plotting_drive_by_mountpoint=get_plot_drive_to_use(), current_plotting_drive_by_device=get_device_by_mountpoint(get_plot_drive_to_use())[0][1], drive_size=bytes2human(usage.total), drive_serial_number=Device(get_device_by_mountpoint(get_plot_drive_to_use())[0][1]).serial, current_drive_temperature=Device(get_device_by_mountpoint(get_plot_drive_to_use())[0][1]).temperature, smart_health_assessment=Device(get_device_by_mountpoint(get_plot_drive_to_use())[0][1]).assessment, total_serverwide_plots=get_all_available_system_space('used')[1], total_number_of_drives=get_all_available_system_space('total')[0], total_k32_plots_until_full=get_all_available_system_space('free')[1], max_number_of_plots=get_all_available_system_space('total')[1], total_serverwide_plots_chia=check_plots()[0], total_serverwide_space_per_chia=check_plots()[1], total_plots_last_day=read_config_data('plot_manager_config', 'plotting_information', 'current_total_plots_daily', False), days_to_fill_drives=(int(get_all_available_system_space('free')[1] / int(read_config_data('plot_manager_config', 'plotting_information', 'current_total_plots_daily', False)))), average_plots_per_hour=round((int(read_config_data('plot_manager_config', 'plotting_information', 'current_total_plots_daily', False)))/24,1), average_plotting_speed=(int(read_config_data('plot_manager_config', 'plotting_information', 'current_total_plots_daily', False)) * int(plot_size_g)/1000)) else: pass
def send_new_plot_disk_email(): usage = psutil.disk_usage(get_device_by_mountpoint(get_plot_drive_to_use())[0][0]) current_plotting_drive = read_config_data('plot_manager_config', 'plotting_drives', 'current_plotting_drive', False) if read_config_data('plot_manager_config', 'notifications', 'new_plot_drive', True): for email_address in system_info.alert_email: send_template_email(template='new_plotting_drive.html', recipient=email_address, subject='New Plotting Drive Selected\nContent-Type: text/html', current_time=current_military_time, nas_server=nas_server, previous_plotting_drive=current_plotting_drive, plots_on_previous_plotting_drive=get_drive_info('total_current_plots_by_mountpoint',current_plotting_drive), current_plotting_drive_by_mountpoint=get_plot_drive_to_use(), current_plotting_drive_by_device=get_device_by_mountpoint(get_plot_drive_to_use())[0][1], drive_size=bytes2human(usage.total), plots_available=get_drive_info('space_free_plots_by_mountpoint', (get_plot_drive_to_use())), drive_serial_number=Device(get_device_by_mountpoint(get_plot_drive_to_use())[0][1]).serial, current_drive_temperature=Device(get_device_by_mountpoint(get_plot_drive_to_use())[0][1]).temperature, smart_health_assessment=Device(get_device_by_mountpoint(get_plot_drive_to_use())[0][1]).assessment, total_serverwide_plots=get_all_available_system_space('used')[1], total_serverwide_plots_chia=check_plots()[0], total_serverwide_space_per_chia=check_plots()[1], total_number_of_drives=get_all_available_system_space('total')[0], total_k32_plots_until_full=get_all_available_system_space('free')[1], max_number_of_plots=get_all_available_system_space('total')[1], days_to_fill_drives=(int(get_all_available_system_space('free')[1] / int(read_config_data('plot_manager_config', 'plotting_information', 'current_total_plots_daily', False))))) else: pass
def update_device(folder, device_name, interface_name=None): sf = SmartctlFile(folder) json_dict = {"name": device_name} if interface_name is None: dev = Device(device_name, smartctl=sf) else: dev = Device(device_name, interface=interface_name, smartctl=sf) json_dict['interface'] = interface_name json_dict['values'] = get_object_properties(dev, deep_copy=False) # Remove non serializable objects to_delete = [ 'smartctl', ] # add to list private objects for entry in json_dict['values']: if entry[0] == '_': to_delete.append(entry) for todel in to_delete: if todel in json_dict['values']: del json_dict['values'][todel] # Transform attributes if 'attributes' in json_dict['values']: att_list = [] for att in json_dict['values']['attributes']: if att is None: att_list.append(None) else: att_list.append(get_object_properties(att)) json_dict['values']['attributes'] = att_list # Transform tests if 'tests' in json_dict['values']: test_list = [] for tst in json_dict['values']['tests']: if tst is None: test_list.append(None) else: test_list.append(get_object_properties(tst)) json_dict['values']['tests'] = test_list # Direct transform for other properties to_transform = ['diagnostics'] for prop in json_dict['values']: if prop in to_transform: json_dict['values'][prop] = get_object_properties( json_dict['values'][prop]) with open(os.path.join(folder, 'device.json'), "w") as f: f.write(json.dumps(json_dict, indent=4))
def create_device(self, folder: str, data) -> Device: sf = SmartctlFile(folder) if 'interface' not in data: return Device(data['name'], smartctl=sf) else: return Device(data['name'], interface=data['interface'], smartctl=sf)
def run(self, id, test_type): try: diskinfo = self.dispatcher.call_sync("get_disk_config_by_id", id) except RpcException: raise TaskException(errno.ENOENT, 'Disk {0} not found'.format(id)) dev = Device(diskinfo['gdisk_name']) dev.run_selftest_and_wait( getattr(SelfTestType, test_type).value, progress_handler=self.handle_progress ) self.dispatcher.call_sync('disks.update_disk_cache', diskinfo['path'], timeout=120)
def make_snapshot(): # get storage devices' mountpoints from smartctl output # (pySMART DeviceList is empty) mp_strings = subprocess.run('smartctl.exe --scan', capture_output=True) \ .stdout.decode('utf-8').split('\r\n') # get ATA devices only mp_list = [] for string in mp_strings: if string.endswith('ATA device'): mp_list.append(string.split(' ')[0]) dev_list = [] for mountpoint in mp_list: dev_list.append(Device(mountpoint)) # get SMART for each device smarts = {} for device in dev_list: device_name = f'{device.model} :: {device.serial}' device_smart = get_device_smart(device) smarts[device_name] = device_smart # assembly and return the new snapshot snapshot = {} snapshot['timestamp'] = str(datetime.utcnow()) snapshot['devices'] = smarts return snapshot
def info_from_device(devname): disk_info = { 'serial': None, 'max_rotation': None, 'smart_enabled': False, 'smart_capable': False, 'smart_status': None, 'model': None, 'is_ssd': False, 'interface': None } # TODO, fix this to deal with above generated args for interface dev_smart_info = Device(os.path.join('/dev/', devname)) disk_info['is_ssd'] = dev_smart_info.is_ssd disk_info['smart_capable'] = dev_smart_info.smart_capable if dev_smart_info.smart_capable: disk_info['serial'] = dev_smart_info.serial disk_info['model'] = dev_smart_info.model disk_info['max_rotation'] = dev_smart_info.rotation_rate disk_info['interface'] = dev_smart_info.interface disk_info['smart_enabled'] = dev_smart_info.smart_enabled if dev_smart_info.smart_enabled: disk_info['smart_status'] = dev_smart_info.assessment return disk_info
def smart(self, disk): diskname = "" disk_root = [] num = 0 disk_root, num = self.getDf(disk) sk = Device(disk) if (sk.assessment == "PASS"): status = True else: status = False answer = str( disk) + "\n\n" + '''Serial -> {0}\n Firmware -> {1}\n Model -> {2} \nS.M.A.R.T. status is good: {3}\nDevice size: {4}'''.format( str(sk.serial), str(sk.firmware), str(sk.model), str(status), str(sk.capacity)) for i in range(num): #проход по непустым строкам root_info = disk_root[i].split()[ 0:6] #преобразование строки в список значений #формирование строки информации о разделе диска root_info_str = "\n\nPartition: {0}\nSize: {1}\nUsed: {2}\nAvail: {3}\nUse(%): {4}\nMount point: {5}\n\n".format( str(root_info[0]), str(root_info[1]), str(root_info[2]), str(root_info[3]), str(root_info[4]), str(root_info[5])) #формирование окончательного ответа answer = answer + root_info_str return answer
def add_system_metrics_nvme(args, messages): """Add system metrics for an optional NVMe drive (/dev/nvme0) Args: args: all program arguments messages: the message queue to append metric to """ timestamp = time.time_ns() logging.info("collecting system metrics (NVMe)") nvmeroot = "/dev/nvme0" type = "nvme-therm" zone = "none" try: if Path(nvmeroot).exists(): nvmedev = Device("/dev/nvme0") messages.append( message.Message( name="sys.thermal", value=float(nvmedev.temperature), timestamp=timestamp, meta={"type": type, "zone": zone}, ) ) else: logging.info("nvme (%s) not found. skipping...", nvmeroot) except Exception: logging.exception("failed to get nvme system metrics")
def run(context): SMARTD_CONF = [] smartd_config = context.client.call_sync('service.smartd.get_config') smartd_common_line = "-n {0} -W {1},{2},{3} -m root\n".format( smartd_config['power_mode'].lower(), 0 if smartd_config['temp_difference'] is None else smartd_config['temp_difference'], 0 if smartd_config['temp_informational'] is None else smartd_config['temp_informational'], 0 if smartd_config['temp_critical'] is None else smartd_config['temp_critical'] ) # Get all SMART capable disk's info all_disks_info = context.client.call_sync( 'disk.query', [('status.smart_capable', '=', True)] ) for single_disk in all_disks_info: device_smart_handle = Device(single_disk['path'], abridged=True) # Check if the disk's smart enabled value is the same as that in the database # if not try to DTRT it if single_disk['smart'] != device_smart_handle.smart_enabled: # toggle_result is a tuple containing: # (Action succeded: True/False, Error message if first is False) toggle_result = device_smart_handle.smart_toggle( 'on' if single_disk['smart'] else 'off' ) if not toggle_result[0]: # Log this failure in etcd's log and continue # Cause we might be able to configure atleast for the other disks context.logger.error( "smartd_conf.py: {0} -> Tried to toggle disk's ".format(single_disk['path']) + " SMART enabled to: {0} and failed with error: {1}".format( single_disk['smart'], toggle_result[1] ) ) continue if single_disk['smart']: smartd_line = "{0} -a {1}".format( single_disk['path'], smartd_common_line ) SMARTD_CONF.append(smartd_line) with open("/usr/local/etc/smartd.conf", "w+") as f: for line in SMARTD_CONF: f.write(line) context.emit_event('etcd.file_generated', {'filename': "/usr/local/etc/smartd.conf"})
def log_drive_report(): """ Logs a drive report of our newly selected plot drive """ templ = "%-15s %6s %15s %12s %10s %5s" log.info(templ % ("New Plot Drive", "Size", "Avail Plots", "Serial #", "Temp °C", "Mount Point")) usage = psutil.disk_usage(get_device_by_mountpoint(get_plot_drive_to_use())[0][0]) log.info(templ % ( get_device_by_mountpoint(get_plot_drive_to_use())[0][1], bytes2human(usage.total), get_drive_info('space_free_plots_by_mountpoint', (get_plot_drive_to_use())), Device(get_device_by_mountpoint(get_plot_drive_to_use())[0][1]).serial, Device(get_device_by_mountpoint(get_plot_drive_to_use())[0][1]).temperature, get_device_by_mountpoint(get_plot_drive_to_use())[0][0]))
def test_tester_smart(Device: pySMART.Device): Device.run_selftest = MagicMock(return_value=(0, None, 3)) Device.update = MagicMock() Device.tests = [MagicMock()] Device.tests[0].remain = '0%' Device.tests[0].hours = '24' Device.tests[0].LBA = '0' Device.tests[0].type = 'foo-type' Device.tests[0].status = 'foo-status' Device.attributes = [None] * 256 Device.attributes[9] = MagicMock() Device.attributes[9].raw = 99 Device.attributes[12] = MagicMock() Device.attributes[12].raw = '11' Device.assessment = 'PASS' r = Tester.smart('/foo/bar', test_type=Smart.short) assert r == { 'lifetime': 24, '@type': 'TestHardDrive', 'error': False, 'type': 'foo-type', 'status': 'foo-status', 'firstError': 0, 'passedLifetime': 99, 'assessment': True, 'powerCycleCount': 11 }
def hard_drive(self, node, get_removable=False) -> dict or None: logical_name = node['logicalname'] interface = run('udevadm info ' '--query=all ' '--name={} | ' 'grep ' 'ID_BUS | ' 'cut -c 11-'.format(logical_name), check=True, universal_newlines=True, shell=True, stdout=PIPE).stdout # todo not sure if ``interface != usb`` is needed is_not_removable = interface != 'usb' and not get( node, 'capabilities.removable') is_removable = interface == 'usb' if get_removable and is_removable or not get_removable and is_not_removable: # If get_removable and is_removable or not get_removable and is not_removable hdd = { '@type': 'HardDrive', 'size': floor(utils.convert_capacity(node['size'], node['units'], 'MB')), 'interface': interface, PrivateFields.logical_name: logical_name } with catch_warnings(): filterwarnings('error') with suppress(Warning): hdd['type'] = 'SSD' if Device( logical_name).is_ssd else 'HDD' assert 10000 < hdd['size'] < 10**8, 'Invalid HDD size {} MB'.format( hdd['size']) if self.benchmarker: hdd['benchmark'] = self.benchmarker.benchmark_hdd(logical_name) hdd = dict(hdd, **self._common(node)) if not hdd['serialNumber']: hdd['serialNumber'] = Device( hdd[PrivateFields.logical_name]).serial if not hdd['model']: hdd['model'] = Device(hdd[PrivateFields.logical_name]).model return hdd
def run(context): SMARTD_CONF = [] smartd_config = context.client.call_sync('service.smartd.get_config') smartd_common_line = "-n {0} -W {1},{2},{3} -m root\n".format( smartd_config['power_mode'].lower(), 0 if smartd_config['temp_difference'] is None else smartd_config['temp_difference'], 0 if smartd_config['temp_informational'] is None else smartd_config['temp_informational'], 0 if smartd_config['temp_critical'] is None else smartd_config['temp_critical']) # Get all SMART capable disk's info all_disks_info = context.client.call_sync( 'disk.query', [('status.smart_info.smart_capable', '=', True)]) for single_disk in all_disks_info: device_smart_handle = Device(single_disk['path'], abridged=True) # Check if the disk's smart enabled value is the same as that in the database # if not try to DTRT it if single_disk['smart'] != device_smart_handle.smart_enabled: # toggle_result is a tuple containing: # (Action succeded: True/False, Error message if first is False) toggle_result = device_smart_handle.smart_toggle( 'on' if single_disk['smart'] else 'off') if not toggle_result[0]: # Log this failure in etcd's log and continue # Cause we might be able to configure atleast for the other disks context.logger.error( "smartd_conf.py: {0} -> Tried to toggle disk's ".format( single_disk['path']) + " SMART enabled to: {0} and failed with error: {1}".format( single_disk['smart'], toggle_result[1])) continue if single_disk['smart']: smartd_line = "{0} -a {1}".format(single_disk['path'], smartd_common_line) SMARTD_CONF.append(smartd_line) with open("/usr/local/etc/smartd.conf", "w+") as f: for line in SMARTD_CONF: f.write(line) context.emit_event('etcd.file_generated', {'filename': "/usr/local/etc/smartd.conf"})
def get(self, disk): try: raw_info = Device('/dev/' + disk) smart_info = {} for smart_value in raw_info.attributes: if smart_value: smart_info[smart_value.name] = smart_value.raw return smart_info except: print "[SMART] Unable to fetch SMART data for device: %s" % disk return {"error": True}
def get_drive_info(action, drive): """ This allows us to query specific information about our drives including temperatures, smart assessments, and space available to use for plots. It allows us to simply hand it a drive number (drive0, drive22, etc) and will present us with the data back. This utilizes pySMART, but a word of caution, use the TrueNAS versions linked to above, the PiPy version has a bug! """ if action == 'device': plot_drives = get_list_of_plot_drives() device = [hd for hd in plot_drives if hd[0] == (get_mountpoint_by_drive_number(drive)[0])] if device != []: device = [hd for hd in plot_drives if hd[0] == (get_mountpoint_by_drive_number(drive)[0])] return device[0][1] if action == 'temperature': return Device(get_device_info_by_drive_number(drive)[0][1]).temperature if action == 'capacity': return Device(get_device_info_by_drive_number(drive)[0][1]).capacity if action == 'health': return Device(get_device_info_by_drive_number(drive)[0][1]).assessment if action == 'name': return Device(get_device_info_by_drive_number(drive)[0][1]).name if action == 'serial': return Device(get_device_info_by_drive_number(drive)[0][1]).serial if action == 'space_total': return int(bytesto(shutil.disk_usage(get_device_info_by_drive_number(drive)[0][0])[0], 'g')) if action == 'space_used': return int(bytesto(shutil.disk_usage(get_device_info_by_drive_number(drive)[0][0])[1], 'g')) if action == 'space_free': return int(bytesto(shutil.disk_usage(get_device_info_by_drive_number(drive)[0][0])[2], 'g')) if action == 'space_free_plots': return int(bytesto(shutil.disk_usage(get_device_info_by_drive_number(drive)[0][0])[2], 'g') / plot_size_g) if action == 'space_free_plots_by_mountpoint': return int(bytesto(shutil.disk_usage(drive)[2], 'g') / plot_size_g) if action == 'total_current_plots': return int(bytesto(shutil.disk_usage(get_mountpoint_by_drive_number(drive)[0])[1], 'g') / plot_size_g) if action == 'total_current_plots_by_mountpoint': return int(bytesto(shutil.disk_usage(drive)[1], 'g') / plot_size_g)
def health_check(self): files = os.listdir('/dev') disks = list(filter(lambda x: x.startswith('sd'), files)) disks = list(filter(lambda x: x.isalpha(), disks)) disks.sort() i = 0 while i < len(disks): sk = Device('/dev/' + disks[i]) if (sk.assessment == "PASS"): return ("SMART status is good") else: return ("SMART status is not good")
def test_tester_no_smart(Device: pySMART.Device): """ Tests the smart tester with a hard-drive that doesn't support SMART. """ def init(_): warn('') Device.__init__ = init r = Tester.smart('/foo/bar', test_type=Smart.short) assert r == { 'error': True, 'status': 'SMART cannot be enabled on this device.', '@type': 'TestHardDrive' }
print(message.sid) def physical_drives(): drive_glob = '/sys/block/*/device' return [basename(dirname(d)) for d in glob(drive_glob)] for disk in physical_drives(): if disk not in config['disks']: print( disk + ' found on system. Consider adding it to config.yaml to monitor it' ) for disk in config['disks']: smart_status = Device('/dev/' + disk) disk_string = 'Disk ' + disk print(smart_status.assessment) if smart_status.assessment is not None: if smart_status.assessment not in 'PASS': print(disk_string + ' failed smart test') sms_body = sms_body + 'Disk ' + disk + ' failed smart test' else: print(disk_string + " doesn't exist") sms_body = sms_body + disk_string + ' not found on system. Could indicate broken disk\n' if sms_body and config['debug']['enabled'] is not True: send_sms(sms_body)
# # Authors: Stefan Kauerauf import argparse from pySMART import Device import re, sys, os argp = argparse.ArgumentParser(description=__doc__) argp.add_argument('-D', '--device', default='/dev/sda') argp.add_argument('-P', '--percent', default=5) argp.add_argument('--html', default=False, action="store_true") argp.add_argument('--noperf', default=False, action="store_true") args = argp.parse_args() device = Device(args.device) attributes = [] for a in device.attributes: if a: attributes.append(a) # fill table # name, type, value, worst, warning, critical, raw, state, thresh result = [] count_warning = 0 count_critical = 0 if device.assessment != 'PASS': count_critical = count_critical + 1
def smart(cls, disk: str, test_type: Smart) -> dict: # Enable SMART on hard drive with catch_warnings(): filterwarnings('error') try: hdd = Device(disk) # type: Device except Warning: status = 'SMART cannot be enabled on this device.' print(status, file=sys.stderr) return { '@type': 'TestHardDrive', 'error': True, 'status': status } status_code, status_message, completion_time = hdd.run_selftest( test_type.value) if status_code > 1: print(status_message, file=sys.stderr) return { '@type': 'TestHardDrive', 'error': True, 'status': status_message, } # get estimated end of the test try: test_end = parser.parse(completion_time) except TypeError: # completion_time is None, estimate end time duration = 2 if test_type == Smart.short else 120 test_end = datetime.now() + timedelta(minutes=duration) print(' It will finish around {}:'.format(test_end)) # follow progress of test until it ends or the estimated time is reached remaining = 100 # test completion pending percentage with tqdm(total=remaining, leave=True) as bar: while remaining > 0: sleep(2) # wait a few seconds between smart retrievals hdd.update() try: last_test = hdd.tests[0] except (TypeError, IndexError): pass # The suppress: test is None, no tests # work around because SMART has not been initialized # yet but pySMART library doesn't wait # Just ignore the error because we alreaday have an # estimation of the ending time else: last = remaining with suppress(ValueError): remaining = int(last_test.remain.strip('%')) completed = last - remaining if completed > 0: bar.update(completed) # only allow a few seconds more than the estimated time if datetime.now() > test_end + cls.SMART_GRACE_TIME: break # show last test hdd.update() last_test = hdd.tests[0] try: lba_first_error = int(last_test.LBA, 0) # accept hex and decimal value except ValueError: lba_first_error = None ret = { '@type': 'TestHardDrive', 'type': last_test.type, 'error': bool(lba_first_error), 'status': last_test.status, 'firstError': lba_first_error, 'passedLifetime': int(hdd.attributes[9].raw), 'assessment': True if hdd.assessment == 'PASS' else False } with suppress(ValueError): ret['lifetime'] = int(last_test.hours) for key, name in cls.SMART_ATTRIBUTES.items(): with suppress(AttributeError): ret[name] = int(hdd.attributes[key].raw) return ret
def get_current_plot_drive_info(): """ Designed for debugging and logging purposes when we switch drives """ return Device(get_device_by_mountpoint(get_plot_drive_to_use())[0][1]).temperature