def main(): """Check that swift owns its relevant files and directories.""" # Check /etc/swift config_results = [] not_swift_owned_config(config_results) empty_files(config_results) # Check files under /srv/node data_results = [] not_swift_owned_data(data_results) # Generate metrics. Use the "reason" field from the *first* failure # in each category to populate the msg field for Severity.fail. If there # are several failures, the user will have to resolve them one by one. metrics = [] if config_results: metrics.append(MetricData.single(__name__ + '.config', Severity.fail, message='{message}', msgkeys=config_results[0])) else: metrics.append(MetricData.single(__name__ + '.config', Severity.ok, message='OK')) if data_results: metrics.append(MetricData.single(__name__ + '.data', Severity.fail, message='{message}', msgkeys=data_results[0])) else: metrics.append(MetricData.single(__name__ + '.data', Severity.ok, message='OK')) return metrics
def test_get_logical_drive_info_failures(self): tests = [(LOGICAL_DRIVE_LUN_FAIL, "lun_status"), (LOGICAL_DRIVE_CACHE_FAIL, "cache_status")] test_slot = "1" for test_data, failed_component in tests: mock_command = mock.Mock() mock_command.return_value = CommandResult(0, test_data) with mock.patch("swiftlm.hp_hardware.hpssacli.run_cmd", mock_command): actual = hpssacli.get_logical_drive_info(test_slot) expected_lun = MetricData.single( hpssacli.__name__ + ".logical_drive", Severity.ok, "OK", { "component": "logical_drive", "logical_drive": "L", "sub_component": "lun_status", "caching": "Enabled", "status": "OK", }, ) expected_cache = MetricData.single( hpssacli.__name__ + ".logical_drive", Severity.ok, "OK", { "component": "logical_drive", "logical_drive": "L", "sub_component": "cache_status", "caching": "Enabled", "status": "OK", }, ) if expected_lun["sub_component"] == failed_component: expected_lun.value = Severity.fail expected_lun["status"] = "Fail" expected_lun._message = hpssacli.BASE_RESULT.messages["l_drive"] if expected_cache["sub_component"] == failed_component: expected_lun["caching"] = "Disabled" actual = self.check_metrics(expected_lun, actual) if expected_cache["sub_component"] == failed_component: expected_cache.value = Severity.fail expected_cache["caching"] = "Disabled" expected_cache._message = hpssacli.BASE_RESULT.messages["l_cache"] if expected_lun["sub_component"] == failed_component: expected_cache["status"] = "Fail" actual = self.check_metrics(expected_cache, actual) self.assertFalse(actual, "Got more metrics than expected")
def test_get_logical_drive_info_failures(self): tests = [ (LOGICAL_DRIVE_LUN_FAIL, 'lun_status'), (LOGICAL_DRIVE_CACHE_FAIL, 'cache_status') ] test_slot = "1" for test_data, failed_component in tests: mock_command = mock.Mock() mock_command.return_value = CommandResult(0, test_data) with mock.patch('swiftlm.hp_hardware.hpssacli.run_cmd', mock_command): actual = hpssacli.get_logical_drive_info(test_slot) expected_lun = MetricData.single( hpssacli.__name__ + '.logical_drive', Severity.ok, 'OK', {'component': 'logical_drive', 'logical_drive': 'L', 'sub_component': 'lun_status', 'caching': 'Enabled', 'status': "OK"}) expected_cache = MetricData.single( hpssacli.__name__ + '.logical_drive', Severity.ok, 'OK', {'component': 'logical_drive', 'logical_drive': 'L', 'sub_component': 'cache_status', 'caching': 'Enabled', 'status': "OK"}) if expected_lun['sub_component'] == failed_component: expected_lun.value = Severity.fail expected_lun['status'] = 'Fail' expected_lun._message = (hpssacli.BASE_RESULT.messages ['l_drive']) if expected_cache['sub_component'] == failed_component: expected_lun['caching'] = 'Disabled' actual = self.check_metrics(expected_lun, actual) if expected_cache['sub_component'] == failed_component: expected_cache.value = Severity.fail expected_cache['caching'] = 'Disabled' expected_cache._message = (hpssacli.BASE_RESULT.messages ['l_cache']) if expected_lun['sub_component'] == failed_component: expected_cache['status'] = 'Fail' actual = self.check_metrics(expected_cache, actual) self.assertFalse(actual, 'Got more metrics than expected')
def test_get_logical_drive_info(self): # Test that normal output and bugged output give exactly # the same results mock_command = mock.Mock() test_slot = "1" mock_command.return_value = CommandResult(0, LOGICAL_DRIVE_DATA) with mock.patch("swiftlm.hp_hardware.hpssacli.run_cmd", mock_command): data_1 = hpssacli.get_logical_drive_info(test_slot) self.assertIsInstance(data_1, list) self.assertTrue(len(data_1), 3) mock_command = mock.Mock() mock_command.return_value = CommandResult(0, LOGICAL_DRIVE_DATA_BUGGED) with mock.patch("swiftlm.hp_hardware.hpssacli.run_cmd", mock_command): data_2 = hpssacli.get_logical_drive_info(test_slot) self.assertIsInstance(data_2, list) self.assertTrue(len(data_2), 3) # Check the data is the same for both for d in data_1: data_2 = self.check_metrics(d, data_2) # Check data is as expected. expected_lun = MetricData.single( hpssacli.__name__ + ".logical_drive", Severity.ok, "OK", { "component": "logical_drive", "sub_component": "lun_status", "status": "OK", "logical_drive": "L", "caching": "Enabled", }, ) data_1 = self.check_metrics(expected_lun, data_1) expected_cache = MetricData.single( hpssacli.__name__ + ".logical_drive", Severity.ok, "OK", { "component": "logical_drive", "sub_component": "cache_status", "status": "OK", "logical_drive": "L", "caching": "Enabled", }, ) data_1 = self.check_metrics(expected_cache, data_1) self.assertFalse(data_1, "Got more metrics than expected with" "LOGICAL_DRIVE_DATA") self.assertFalse(data_2, "Got more metrics than expected with" "LOGICAL_DRIVE_DATA_BUGGED")
def test_response_child(self): r = MetricData(name='name', messages={'a': 'b'}) r['test'] = 'test' c = r.child(dimensions={'test2': 'test2'}) self.assertIn('test', c) self.assertIn('test2', c) self.assertDictEqual({'a': 'b'}, c.messages) self.assertEqual('swiftlm.name', c.name) c = r.child() self.assertIn('test', c) self.assertNotIn('test2', c)
def test_child_msgkeys(self): r = MetricData(name='name', messages={ 'ok': 'test message', 'test': 'test with meta {test_value} and {test_value2}', }) c = r.child(dimensions={'test_value': '123'}, msgkeys={'test_value2': '456'}) c.message = 'test' self.assertEqual('test with meta 123 and 456', str(c))
def main(): args = parse_args() metrics = [] for func in args.selected: try: r = func() if isinstance(r, list) and r and isinstance(r[0], MetricData): metrics.extend([result.metric() for result in r]) elif isinstance(r, MetricData): metrics.append(r.metric()) except: # noqa t, v, tb = sys.exc_info() backtrace = ' '.join(traceback.format_exception(t, v, tb)) r = MetricData.single( 'check.failure', Severity.fail, '{check} failed with: {error}', { 'check': str(func), 'error': backtrace.replace('\n', ' '), 'component': 'swiftlm-scan', 'service': 'object-storage' }) metrics.append(r.metric()) pass FORMATS[args.format](metrics, args.pretty)
def test_create_metricdata(self): r = MetricData(name='name', messages={}) self.assertEqual('swiftlm.name', r.name) self.assertEqual('', r.message) self.assertEqual(None, r.value) self.assertIn('hostname', r.dimensions)
def main(): args = parse_args() metrics = [] for func in args.selected: try: r = func() if isinstance(r, list) and r and isinstance(r[0], MetricData): metrics.extend([result.metric() for result in r]) elif isinstance(r, MetricData): metrics.append(r.metric()) except: # noqa t, v, tb = sys.exc_info() backtrace = ' '.join(traceback.format_exception(t, v, tb)) r = MetricData.single('check.failure', Severity.fail, '{check} failed with: {error}', {'check': str(func), 'error': backtrace.replace('\n', ' '), 'component': 'swiftlm-scan', 'service': 'object-storage'}) metrics.append(r.metric()) pass FORMATS[args.format](metrics, args.pretty)
def test_metrics_dirs_not_exist(self): expected = [ MetricData.single('swiftlm.swift.file_ownership.config', Severity.fail, message='dummy'), MetricData.single('swiftlm.swift.file_ownership.data', Severity.fail, message='dummy') ] with mock.patch('swiftlm.swift.file_ownership.server_type', lambda x: x == ServerType.object): with mock.patch('pwd.getpwuid') as mock_pwuid: mock_pwuid.return_value = mock.Mock(pw_name='swift') results = FO.main() self.assertEqual(len(expected), 2) same = metrics_are_similar(results, expected) self.assertEqual('', same, msg=same)
def test_get_logical_drive_info(self): # Test that normal output and bugged output give exactly # the same results mock_command = mock.Mock() test_slot = "1" mock_command.return_value = CommandResult(0, LOGICAL_DRIVE_DATA) with mock.patch('swiftlm.hp_hardware.hpssacli.run_cmd', mock_command): data_1 = hpssacli.get_logical_drive_info(test_slot) self.assertIsInstance(data_1, list) self.assertTrue(len(data_1), 3) mock_command = mock.Mock() mock_command.return_value = CommandResult(0, LOGICAL_DRIVE_DATA_BUGGED) with mock.patch('swiftlm.hp_hardware.hpssacli.run_cmd', mock_command): data_2 = hpssacli.get_logical_drive_info(test_slot) self.assertIsInstance(data_2, list) self.assertTrue(len(data_2), 3) # Check the data is the same for both for d in data_1: data_2 = self.check_metrics(d, data_2) # Check data is as expected. expected_lun = MetricData.single( hpssacli.__name__ + '.logical_drive', Severity.ok, 'OK', {'component': 'logical_drive', 'sub_component': 'lun_status', 'status': "OK", 'logical_drive': 'L', 'caching': 'Enabled'}) data_1 = self.check_metrics(expected_lun, data_1) expected_cache = MetricData.single( hpssacli.__name__ + '.logical_drive', Severity.ok, 'OK', {'component': 'logical_drive', 'sub_component': 'cache_status', 'status': "OK", 'logical_drive': 'L', 'caching': 'Enabled'}) data_1 = self.check_metrics(expected_cache, data_1) self.assertFalse(data_1, 'Got more metrics than expected with' 'LOGICAL_DRIVE_DATA') self.assertFalse(data_2, 'Got more metrics than expected with' 'LOGICAL_DRIVE_DATA_BUGGED')
def test_dict_behaviour(self): r = MetricData(name='name', messages={}) r['test'] = 1000 # dimension values must be strings so we check they are converted # properly self.assertEqual('1000', r['test']) del r['test'] self.assertNotIn('test', r)
def test_details_ok(self): mock_command = Mock() mock_command.return_value = CommandResult(0, 'stratum=1,offset=2,') with patch('swiftlm.systems.ntp.run_cmd', mock_command): with patch('swiftlm.systems.ntp.check_status', lambda: []): actual = ntp.main() self.assertIsInstance(actual, list) self.assertEqual(len(actual), 2) actual = [a.metric() for a in actual] expected = [ MetricData.single(ntp.__name__+'.stratum', '1', ''), MetricData.single(ntp.__name__+'.offset', '2', '') ] for e in expected: self.assertIn(e.metric(), actual)
def test_get_controller_info(self): expected_base = MetricData( name=hpssacli.__name__ + ".smart_array", messages=hpssacli.BASE_RESULT.messages, dimensions={ "serial": "PACCR0M9VZ41S4Q", "model": "Smart Array P410", "slot": "1", "component": "controller", }, ) # List of tuples. # t[0] = Data set that hpssacli should return # t[1] = The failed component in the test data tests = [ (SMART_ARRAY_DATA, []), (SMART_ARRAY_CACHE_FAIL, ["cache"]), (SMART_ARRAY_BATTERY_FAIL, ["battery/capacitor"]), (SMART_ARRAY_CONTROLLER_FAIL, ["controller"]), (SMART_ARRAY_BATTERY_COUNT_FAIL, ["battery/capacitor count"]), ] for test_data, failures in tests: mock_command = mock.Mock() mock_command.return_value = CommandResult(0, test_data) with mock.patch("swiftlm.hp_hardware.hpssacli.run_cmd", mock_command): actual, actual_slots = hpssacli.get_smart_array_info() self.assertIsInstance(actual, list) self.assertEqual(len(actual), 5) expected_firmware = expected_base.child("firmware") expected_firmware.value = 6.60 actual = self.check_metrics(expected_firmware, actual) bcc = "battery/capacitor count" if bcc in failures: expected_battery_count = expected_base.child(dimensions={"sub_component": bcc, "count": "0"}) expected_battery_count.value = Severity.fail expected_battery_count.message = "no_battery" else: expected_battery_count = expected_base.child(dimensions={"sub_component": bcc, "count": "1"}) expected_battery_count.value = Severity.ok actual = self.check_metrics(expected_battery_count, actual) for submetric in ("battery/capacitor", "controller", "cache"): if submetric in failures: expected_status = expected_base.child(dimensions={"sub_component": submetric, "status": "FAIL"}) expected_status.value = Severity.fail expected_status.message = "controller_status" else: expected_status = expected_base.child(dimensions={"sub_component": submetric, "status": "OK"}) expected_status.value = Severity.ok actual = self.check_metrics(expected_status, actual) self.assertFalse(actual, "Got more metrics than expected")
def test_message(self): r = MetricData(name='name', messages={ 'ok': 'test message', 'test': 'test with meta {test_value}', }) # Test automatic message assignment when a the Status Enum is used # as the value self.assertEqual('', r.message) r.value = Severity.ok self.assertEqual('test message', r.message) # Test that an error is raised when trying to use a message without # providing all of the dimension values first. with self.assertRaisesRegexp(ValueError, 'requires a dimension value'): r.message = 'test' r['test_value'] = '123' r.message = 'test' self.assertEqual('test with meta 123', str(r))
def check_rsync(): metrics = [] rsync_running, ip_port_match = get_rsync_bind_ip() if not rsync_running: dimensions = get_base_dimensions() dimensions["component"] = "rsync" metrics.append( MetricData.single('swiftlm.swift.swift_services', Severity.fail, message='rsync is not running', dimensions=dimensions)) return metrics else: dimensions = get_base_dimensions() dimensions["component"] = "rsync" metrics.append( MetricData.single('swiftlm.swift.swift_services', Severity.ok, message='rsync is running', dimensions=dimensions)) if not ip_port_match: dimensions = get_base_dimensions() dimensions["component"] = "rsync" metrics.append( MetricData.single( 'swiftlm.swift.swift_services.check_ip_port', Severity.fail, message='rsync is not listening on the correct ip or port', dimensions=dimensions)) else: dimensions = get_base_dimensions() dimensions["component"] = "rsync" metrics.append( MetricData.single('swiftlm.swift.swift_services.check_ip_port', Severity.ok, message='OK', dimensions=dimensions)) return metrics
def test_metrics_dirs_are_owned(self): self._create_etc_file('rsyslog.conf', content='blah') self._create_etc_file('rsyncd.conf', content='blah') path1 = os.path.join(self.etc_dir, 'swift', 'object-server', '1') path2 = os.path.join(self.srv_dir, 'node', '1') os.makedirs(path1) os.makedirs(path2) expected = [ MetricData.single('swiftlm.swift.file_ownership.config', Severity.ok, message='OK'), MetricData.single('swiftlm.swift.file_ownership.data', Severity.ok, message='OK') ] with mock.patch('swiftlm.swift.file_ownership.server_type', lambda x: x == ServerType.object): with mock.patch('pwd.getpwuid') as mock_pwuid: mock_pwuid.return_value = mock.Mock(pw_name='swift') results = FO.main() self.assertEqual(len(expected), 2) same = metrics_are_same(results, expected) self.assertEqual('', same, msg=same)
def test_load_avg(self): mock_command = Mock() mock_command.return_value = '2.15 1.81 1.69 2/1570 29660\n' with patch('swiftlm.systems.system._get_proc_file', mock_command): actual = system.get_load_average() self.assertIsInstance(actual, list) self.assertEqual(len(actual), 1) r = actual[0] self.assertIsInstance(r, MetricData) expected = MetricData.single('load.host.val.five', value=1.81) self.assertEqual(r, expected)
def test_message(self): r = MetricData( name='name', messages={ 'ok': 'test message', 'test': 'test with meta {test_value}', } ) # Test automatic message assignment when a the Status Enum is used # as the value self.assertEqual('', r.message) r.value = Severity.ok self.assertEqual('test message', r.message) # Test that an error is raised when trying to use a message without # providing all of the dimension values first. with self.assertRaisesRegexp(ValueError, 'requires a dimension value'): r.message = 'test' r['test_value'] = '123' r.message = 'test' self.assertEqual('test with meta 123', str(r))
def test_status_ok(self): mock_command = Mock() mock_command.return_value = CommandResult(0, '') with patch('swiftlm.systems.ntp.run_cmd', mock_command): with patch('swiftlm.systems.ntp.check_details', lambda: []): actual = ntp.main() self.assertIsInstance(actual, list) self.assertEqual(len(actual), 1) r = actual[0] self.assertIsInstance(r, MetricData) expected = MetricData.single(ntp.__name__, Severity.ok, ntp.BASE_RESULT.messages['ok']) self.assertEqual(r, expected)
def test_diskusage(self, *args): expected = [MetricData.single('swiftlm.diskusage.host.val.size', 532676608, dimensions={'mount': 'na', 'service': 'object-storage'}), MetricData.single('swiftlm.diskusage.host.val.used', 338939904, dimensions={'mount': 'na', 'service': 'object-storage'}), MetricData.single('swiftlm.diskusage.host.val.avail', 193736704, dimensions={'mount': 'na', 'service': 'object-storage'}), MetricData.single('swiftlm.diskusage.host.val.usage', 64.0, dimensions={'mount': 'na', 'service': 'object-storage'}), MetricData.single('swiftlm.diskusage.host.max.usage', 64.0, dimensions={'service': 'object-storage'}), MetricData.single('swiftlm.diskusage.host.min.usage', 64.0, dimensions={'service': 'object-storage'}), MetricData.single('swiftlm.diskusage.host.avg.usage', 64.0, dimensions={'service': 'object-storage'})] results = check_mounts.diskusage() for result in results: if result in expected: expected.remove(result) else: self.assertEqual(True, False, msg='Not expecting %s' % result.__repr__()) self.assertEqual(0, len(expected), msg='Missing: %s' % expected)
def test_get_physical_drive_info(self): # List of tuples. # t[0] = Data set that hpssacli should return # t[1] = Tuple(Severity, Message, Status) tests = [ (PHYSICAL_DRIVE_DATA, (Severity.ok, "OK", "OK")), (PHYSICAL_DRIVE_STATUS_FAIL, (Severity.fail, hpssacli.BASE_RESULT.messages["physical_drive"], "FAIL")), ] test_slot = "1" for test_data, expected_metrics in tests: mock_command = mock.Mock() mock_command.return_value = CommandResult(0, test_data) with mock.patch("swiftlm.hp_hardware.hpssacli.run_cmd", mock_command): actual = hpssacli.get_physical_drive_info(test_slot) self.assertIsInstance(actual, list) self.assertTrue(len(actual), 1) r = actual[0] self.assertIsInstance(r, MetricData) expected = MetricData.single( hpssacli.__name__ + ".physical_drive", expected_metrics[0], # Severity expected_metrics[1], # Message { "status": expected_metrics[2], # Status "serial": "YFJMHTZD", "box": "1", "bay": "1", "component": "physical_drive", "controller_slot": "1", }, ) self.assertEqual(r, expected)
def test_get_physical_drive_info(self): # List of tuples. # t[0] = Data set that hpssacli should return # t[1] = Tuple(Severity, Message, Status) tests = [ (PHYSICAL_DRIVE_DATA, (Severity.ok, 'OK', 'OK')), (PHYSICAL_DRIVE_STATUS_FAIL, ( Severity.fail, hpssacli.BASE_RESULT.messages['physical_drive'], 'FAIL')) ] test_slot = "1" for test_data, expected_metrics in tests: mock_command = mock.Mock() mock_command.return_value = CommandResult(0, test_data) with mock.patch('swiftlm.hp_hardware.hpssacli.run_cmd', mock_command): actual = hpssacli.get_physical_drive_info(test_slot) self.assertIsInstance(actual, list) self.assertTrue(len(actual), 1) r = actual[0] self.assertIsInstance(r, MetricData) expected = MetricData.single( hpssacli.__name__ + '.physical_drive', expected_metrics[0], # Severity expected_metrics[1], # Message {'status': expected_metrics[2], # Status 'serial': 'YFJMHTZD', 'box': '1', 'bay': '1', 'component': 'physical_drive', 'controller_slot': '1'}) self.assertEqual(r, expected)
def test_details_fail(self): mock_command = Mock() mock_command.return_value = CommandResult(0, 'stratum=1,') with patch('swiftlm.systems.ntp.run_cmd', mock_command): with patch('swiftlm.systems.ntp.check_status', lambda: []): actual = ntp.main() self.assertIsInstance(actual, list) self.assertEqual(len(actual), 2) actual = [a.metric() for a in actual] failed = CheckFailure.child() failed.value = Severity.fail failed['check'] = ntp.__name__ + '.offset' failed['error'] = 'Output does not contain "offset"' expected = [ failed, MetricData.single(ntp.__name__+'.stratum', '1', ''), ] for e in expected: self.assertIn(e.metric(), actual)
try: import commands except ImportError: import subprocess as commands import string from swiftlm.utils.utility import get_swift_bind_ips, UtilityExeception from swiftlm.utils.utility import ip_to_interface from swiftlm.utils.metricdata import MetricData from swiftlm.utils.values import Severity BASE_RESULT = MetricData( name=__name__, messages={ 'fail': 'Could not discover a valid interface name' } ) def str_to_num(val): # if val is a number then convert it to a number literal try: return int(val) except ValueError: try: return float(val) except: return val
def test_equality_behaviour(self): m_a = MetricData('name', self.messages, self.dimensions) m_b = MetricData('name', self.messages, self.dimensions) self.assertEqual(m_a, m_b) m_a = MetricData('name', self.messages, self.dimensions) m_b = MetricData('not-name', self.messages, self.dimensions) self.assertNotEqual(m_a, m_b) m_a = MetricData('name', {'a': 1}, self.dimensions) m_b = MetricData('name', {'b': 2}, self.dimensions) self.assertEqual( m_a, m_b, 'Message dictionaries should not ' 'affect equality of MetricData instances') m_a = MetricData('name', self.messages, self.dimensions) m_b = MetricData('name', self.messages, {}) self.assertNotEqual(m_a, m_b) m_a = MetricData('name', self.messages, self.dimensions) m_b = MetricData('name', self.messages, self.dimensions) m_a.message = 'ok' m_b.message = 'fail' self.assertNotEqual(m_a, m_b) m_a = MetricData('name', self.messages, self.dimensions) m_b = MetricData('name', self.messages, self.dimensions) m_a.value = 1 m_b.value = 2 self.assertNotEqual(m_a, m_b)
def main(): args = parse_args() metrics = [] for func in args.selected: try: r = func() if isinstance(r, list) and r and isinstance(r[0], MetricData): metrics.extend([result.metric() for result in r]) elif isinstance(r, MetricData): metrics.append(r.metric()) except SwiftlmCheckFailure as err: r = MetricData.single('check.failure', Severity.fail, '{error} | Failed with: {check}', dimensions={ 'component': 'swiftlm-scan', 'service': 'object-storage' }, msgkeys={ 'check': func.__module__, 'error': str(err) }) metrics.append(r.metric()) except: # noqa t, v, tb = sys.exc_info() backtrace = ' '.join(traceback.format_exception(t, v, tb)) r = MetricData.single('check.failure', Severity.fail, '{error} | Failed with: {check}', dimensions={ 'component': 'swiftlm-scan', 'service': 'object-storage' }, msgkeys={ 'check': func.__module__, 'error': backtrace.replace('\n', ' ') }) metrics.append(r.metric()) # There is no point in reporting multiple measurements of # swiftlm.check.failure metric in same cycle. check_failures_found = [] for metric in metrics: if metric.get('metric') == 'swiftlm.check.failure': check_failures_found.append(metric) if check_failures_found: # Remove all except one instance for metric in check_failures_found[:-1]: metrics.remove(metric) else: r = MetricData.single('check.failure', Severity.ok, 'ok', dimensions={ 'component': 'swiftlm-scan', 'service': 'object-storage' }) metrics.append(r.metric()) dumped_metrics = FORMATS[args.format](metrics, args.pretty) out_stream = sys.stdout if args.filename: try: with lock_file(args.filename, 2, unlink=False) as cf: cf.truncate() cf.write(dumped_metrics) except (Exception, Timeout) as err: print('ERROR: %s' % err) sys.exit(1) else: out_stream = sys.stdout out_stream.write(dumped_metrics)
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. # from swiftlm.utils.utility import server_type, get_all_proc_and_cmdlines,\ get_network_interface_conf,\ get_rsync_target_conf from swiftlm.utils.metricdata import MetricData, get_base_dimensions from swiftlm.utils.values import Severity BASE_RESULT = MetricData(name=__name__, messages={ 'fail': '{component} is not running', 'ok': '{component} is running', 'unknown': 'no swift services running', }) SERVICES = [ "account-auditor", "account-reaper", "account-replicator", "account-server", "container-replicator", "container-server", "container-updater", "container-auditor", "container-reconciler", "container-sync", "object-replicator", "object-server", "object-updater", "object-auditor", "object-reconstructor", "proxy-server" ] def services_to_check(): # Filter SERVICES down to what should be running on the node. # server_type returns a dict of {'object': bool, etc}
import ast import subprocess import os import ConfigParser from swiftlm.utils.metricdata import MetricData from swiftlm.utils.values import Severity ERRORS_PATTERN = 'drive-audit: Errors found:' DEVICES_PATTERN = 'drive-audit: Devices found:' DRIVE_AUDIT_CONF = '/etc/swift/drive-audit.conf' BASE_RESULT = MetricData( name=__name__, messages={ 'ok': 'No errors found on device mounted at: {mount_point}', 'warn': 'No devices found', 'fail': 'Errors found on device mounted at: {mount_point}', 'unknown': 'Unrecoverable error: {error}' }) def get_devices(output): """ Returns a list of devices as a dict of mount_point and device """ # TODO use drive_model.yml to determine drives to check lines = [s.strip() for s in output.split('\n') if s] for line in lines: if DEVICES_PATTERN in line: devs = line.split(DEVICES_PATTERN)[1].strip() devices = ast.literal_eval(devs)
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. # import re from swiftlm.utils.metricdata import MetricData, CheckFailure from swiftlm.utils.values import Severity from swiftlm.utils.utility import run_cmd BASE_RESULT = MetricData(name=__name__, messages={ 'ok': 'OK', 'fail': 'ntpd not running: {error}', }) def check_status(): cmd_result = run_cmd('systemctl status ntp') r = BASE_RESULT.child() if cmd_result.exitcode != 0: r['error'] = cmd_result.output r.value = Severity.fail else: r.value = Severity.ok return [r]
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. # import re from swiftlm.utils.metricdata import MetricData, CheckFailure from swiftlm.utils.values import Severity from swiftlm.utils.utility import run_cmd BASE_RESULT = MetricData( name='load.host', messages={} ) def _get_proc_file(path): return open(path, mode='r').read() def get_load_average(): r = BASE_RESULT.child(name='val.five') load_avg_data = _get_proc_file('/proc/loadavg') r.value = float(load_avg_data.split()[1]) return [r] def main():
import pwd from swiftlm.utils.utility import server_type from swiftlm.utils.metricdata import MetricData from swiftlm.utils.values import Severity, ServerType SWIFT_DIR = '/etc/swift' CONF_DIR = '/etc' NODE_DIR = '/srv/node' ZERO_BYTE_EXCLUDE = frozenset(['reload-trigger', 'swauth_to_tenant_map.gz']) SWIFT_OWNED_EXCLUDE = frozenset(['lost+found']) BASE_RESULT = MetricData(name=__name__, messages={ 'empty': 'Path: {path} should not be empty', 'ownership': 'Path: {path} is not owned by swift', 'missing': 'Path: {path} is missing', }) def add_result(results, path, reason): c = BASE_RESULT.child(dimensions={'path': path}) c.value = Severity.fail c.message = reason results.add(c) def _is_swift_owned(results, p): # True = good, False = bad owner = pwd.getpwuid(os.stat(p).st_uid).pw_name if owner == 'swift':
def get_smart_array_info(): """ parses controller data from hpssacli in the form. returns a dict. key's are lowercased versions of the key name on each line, including special characters. Values are not changed. keys 'model' and 'slot' are parsed from the first line Smart Array P410 in Slot 1 Bus Interface: PCI Slot: 1 Serial Number: PACCR0M9VZ41S4Q Cache Serial Number: PACCQID12061TTQ RAID 6 (ADG) Status: Disabled Controller Status: OK Hardware Revision: C Firmware Version: 6.60 Rebuild Priority: Medium Expand Priority: Medium Surface Scan Delay: 15 secs Surface Scan Mode: Idle Queue Depth: Automatic Monitor and Performance Delay: 60 min Elevator Sort: Enabled Degraded Performance Optimization: Disabled Inconsistency Repair Policy: Disabled Wait for Cache Room: Disabled Surface Analysis Inconsistency Notification: Disabled Post Prompt Timeout: 15 secs Cache Board Present: True Cache Status: OK Cache Ratio: 25% Read / 75% Write Drive Write Cache: Disabled Total Cache Size: 256 MB Total Cache Memory Available: 144 MB No-Battery Write Cache: Disabled Cache Backup Power Source: Batteries Battery/Capacitor Count: 1 Battery/Capacitor Status: OK SATA NCQ Supported: True Number of Ports: 2 Internal only Encryption Supported: False Driver Name: hpsa Driver Version: 3.4.0 Driver Supports HP SSD Smart Path: False Smart Array P440ar in Slot 0 (Embedded) (HBA Mode) Bus Interface: PCI Slot: 0 Serial Number: PDNLH0BRH7V7GC Cache Serial Number: PDNLH0BRH7V7GC Controller Status: OK Hardware Revision: B Firmware Version: 2.14 Controller Temperature (C): 50 Number of Ports: 2 Internal only Driver Name: hpsa Driver Version: 3.4.4 HBA Mode Enabled: True PCI Address (Domain:Bus:Device.Function): 0000:03:00.0 Negotiated PCIe Data Rate: PCIe 3.0 x8 (7880 MB/s) Controller Mode: HBA Controller Mode Reboot: Not Required Current Power Mode: MaxPerformance Host Serial Number: MXQ51906YF """ results = [] controller_result = BASE_RESULT.child() controller_result.name += '.' + 'smart_array' rc = run_cmd(LOCK_FILE_COMMAND + 'hpssacli ctrl all show detail') if rc.exitcode != 0: if 'Error: No controllers detected.' in str(rc.output): return [] r = MetricData.single('check.failure', Severity.fail, '{check} failed with: {error}', {'check': controller_result.name, 'error': str(rc.output), 'component': 'swiftlm-scan'}) return [r] if rc.output: lines = rc.output.split('\n') else: r = MetricData.single('check.failure', Severity.fail, '{check} failed with: {error}', {'check': controller_result.name, 'error': 'No usable output from hpssacli', 'component': 'swiftlm-scan'}) return [r] controllers = [] info = {} for line in lines: # Ignore blank lines if (not line) or (line.isspace()) or (line == "\n"): continue if is_cont_heading(line): if info: controllers.append(info) # To get controller model, assume that the line is in the form: # <model> in Slot <slot> model = line.strip().split("in Slot")[0].strip() info = {'model': model} continue k, v = line.split(':', 1) k = k.strip().lower() v = v.strip() info[k] = v if info: controllers.append(info) controller_slots = [] for c in controllers: results.extend(check_controller(c, controller_result)) if c.get('slot'): controller_slots.append(c.get('slot')) return results, controller_slots
def get_logical_drive_info(slot, cache_check=True): """ array L Logical Drive: 12 Size: 1.8 TB Fault Tolerance: 0 Heads: 255 Sectors Per Track: 32 Cylinders: 65535 Strip Size: 256 KB Full Stripe Size: 256 KB Status: OK Caching: Enabled Unique Identifier: 600508B1001CEA938043498011A76404 Disk Name: /dev/sdl Mount Points: /srv/node/disk11 1.8 TB Partition Number 2 OS Status: LOCKED Logical Drive Label: AF3C73D8PACCR0M9VZ41S4QEB69 Drive Type: Data LD Acceleration Method: Controller Cache BUG: It appears that the current build of hpssacli has a bug and outputs Disk Name and Mount Points on the same line. We work around this by checking for these specifically but that could fail if they change """ results = [] drive_result = BASE_RESULT.child() drive_result.name += '.' + 'logical_drive' rc = run_cmd( LOCK_FILE_COMMAND + 'hpssacli ctrl slot=%s ld all show detail' % slot) if rc.exitcode != 0: r = MetricData.single('check.failure', Severity.fail, '{check} slot: {slot} failed with: {error}', {'check': drive_result.name, 'slot': slot, 'error': str(rc.output), 'component': 'swiftlm-scan'}) return [r] # Remove blank lines and strip trailing/leading spaces for each line lines = [l.strip() for l in rc.output.split('\n') if l.strip()] if not lines: r = MetricData.single('check.failure', Severity.fail, '{check} slot: {slot} failed with: {error}', {'check': drive_result.name, 'slot': slot, 'error': 'No usable output from hpssacli', 'component': 'swiftlm-scan'}) return [r] # First line should be the controller model and slot number. # We already have this so remove it if it exists if is_cont_heading(lines[0]): lines = lines[1:] drives = [] drive_info = {} for line in lines: # If we see two colons we have to assume that it is a bugged version # of hpssacli and split them accordingly. cc = line.count(':') if cc == 2: _, dn, mp = line.split(':') drive_info['disk name'] = dn.strip().split()[0] drive_info['mount points'] = mp.strip() continue # The Array # line may be useful in the future but does not follow # the format of colon seperated infommation. # It is also the only delimiter between drives. We create a new # drive_info dict when we see it. if line.startswith('array '): if drive_info: drives.append(drive_info) drive_info = {} drive_info['array'] = line.split()[1] continue k, v = line.split(':', 1) k = k.strip().lower() v = v.strip() drive_info[k] = v # Have to add the last drive. if drive_info: drives.append(drive_info) for d in drives: results.extend(check_logical_drive(d, drive_result, cache_check)) return results
except ImportError: import ConfigParser as configparser from collections import OrderedDict from swiftlm.utils.metricdata import MetricData from swiftlm.utils.values import Severity from swiftlm.utils.utility import run_cmd from swiftlm import CONFIG_FILE LOCK_FILE_COMMAND = '/usr/bin/flock -w 10 /var/lock/hpssacli-swiftlm.lock ' BASE_RESULT = MetricData( name=__name__, messages={ 'no_battery': 'No cache battery', 'unknown': 'hpssacli command failed', 'controller_status': '{sub_component} status is {status}', 'in_hba_mode': 'Controller is in HBA mode; performance will be poor', 'physical_drive': 'Drive {serial_number}: ' '{box}:{bay} has status: {status}', 'l_drive': 'Logical Drive {logical_drive} has status: {status}', 'l_cache': 'Logical Drive {logical_drive} has cache status: {caching}', 'ok': 'OK', 'fail': 'FAIL', }) # This is all the data we are looking for in the hpssacli output so we # will _only_ gather whatever values are in this list METRIC_KEYS = [ 'array', 'physicaldrive', 'logical_drive', 'caching', 'serial_number', 'slot', 'firmware_version', 'controller_mode', 'battery_capacitor_presence', 'battery_capacitor_status', 'controller_status', 'cache_status', 'box', 'bay', 'status', 'ld acceleration method'
def is_valid_xfs(d, r): rc = run_cmd('xfs_info %s' % d.mount) if rc.exitcode == 0: return True else: return False BASE_RESULT = MetricData( name=__name__, messages={ is_mounted.__name__: '{device} not mounted at {mount}', is_mounted_775.__name__: ('{device} mounted at {mount} has permissions' ' {permissions} not 755'), is_ug_swift.__name__: ('{device} mounted at {mount} is not owned by' ' swift, has user: {user}, group: {group}'), is_valid_label.__name__: ('{device} mounted at {mount} has invalid ' 'label {label}'), is_xfs.__name__: '{device} mounted at {mount} is not XFS', is_valid_xfs.__name__: '{device} mounted at {mount} is corrupt', 'ok': '{device} mounted at {mount} ok', 'no_devices': 'No devices found' } ) def check_mounts(): results = [] checks = ( is_mounted, is_mounted_775, is_ug_swift,
def do_it(func, metric_name, slot_used): # Test first failure condition. # could be anything from hpssacli is missing to insufficent # privileges mock_command = mock.Mock() mock_command.return_value = CommandResult(1, "error") with mock.patch("swiftlm.hp_hardware.hpssacli.run_cmd", mock_command): if slot_used == "N/A": actual = func() else: actual = func(slot_used) self.assertIsInstance(actual, list) self.assertTrue(len(actual), 1) r = actual[0] if slot_used == "N/A": expected = MetricData.single( "check.failure", Severity.fail, "{check} failed with: {error}", {"check": hpssacli.__name__ + "." + metric_name, "error": "error", "component": "swiftlm-scan"}, ) else: expected = MetricData.single( "check.failure", Severity.fail, "{check} slot: {slot} failed with: {error}", { "check": hpssacli.__name__ + "." + metric_name, "error": "error", "slot": slot_used, "component": "swiftlm-scan", }, ) self.assertEqual(r, expected) # Test hpssacli providing no output. mock_command = mock.Mock() mock_command.return_value = CommandResult(0, "") with mock.patch("swiftlm.hp_hardware.hpssacli.run_cmd", mock_command): if slot_used == "N/A": actual = func() else: actual = func(slot_used) self.assertIsInstance(actual, list) self.assertTrue(len(actual), 1) r = actual[0] if slot_used == "N/A": expected = MetricData.single( "check.failure", Severity.fail, "{check} failed with: {error}", { "check": hpssacli.__name__ + "." + metric_name, "error": "No usable output from hpssacli", "component": "swiftlm-scan", }, ) else: expected = MetricData.single( "check.failure", Severity.fail, "{check} slot: {slot} failed with: {error}", { "check": hpssacli.__name__ + "." + metric_name, "error": "No usable output from hpssacli", "slot": slot_used, "component": "swiftlm-scan", }, ) self.assertEqual(r, expected)
from swiftlm.utils.utility import (get_ring_hosts, server_type, UtilityExeception) from swiftlm.utils.metricdata import MetricData, get_base_dimensions from swiftlm.utils.values import Severity, ServerType from swiftlm.utils.utility import run_cmd # Connectivity needs to report out target hostname and observer hostname # rather than the normal hostname dimension _base_dimensions = dict(get_base_dimensions()) _base_dimensions['observer_host'] = _base_dimensions['hostname'] del _base_dimensions['hostname'] BASE_RESULT = MetricData(name=__name__, messages={ 'ok': '{hostname}:{target_port} ok', 'warn': 'No hosts to check', 'fail': '{hostname}:{target_port} {fail_message}', 'unknown': 'Unrecoverable error: {error}', }, dimensions=_base_dimensions) MAX_THREAD_LIMIT = 10 SWIFT_PROXY_PATH = '/opt/stack/service/swift-proxy-server/etc' class HostPort(namedtuple('HostPort', ['host', 'port'])): @classmethod def from_string(cls, s): """ Create a HostPort instance from a string """ # Supports: # http://host.name, http://host.name:port # host.name, host.name:port
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. # import json from swiftlm.utils.metricdata import MetricData, timestamp, CheckFailure from swiftlm.utils.values import Severity, ServerType RECON_PATH = '/var/cache/swift/' TIMEOUT = 2 BASE_RESULT = MetricData( name=__name__, messages={} ) def _recon_check(st): """ Parses the blah.recon file and returns the last replication. :param st: ServerType, Used to determine the metric names and recon file name. :param replication_field_name: string, name of the field in the json file that hold the last replication data. """ results = [] if not st.is_instance: return results
def test_equality_behaviour(self): m_a = MetricData('name', self.messages, self.dimensions) m_b = MetricData('name', self.messages, self.dimensions) self.assertEqual(m_a, m_b) m_a = MetricData('name', self.messages, self.dimensions) m_b = MetricData('not-name', self.messages, self.dimensions) self.assertNotEqual(m_a, m_b) m_a = MetricData('name', {'a': 1}, self.dimensions) m_b = MetricData('name', {'b': 2}, self.dimensions) self.assertEqual(m_a, m_b, 'Message dictionaries should not ' 'affect equality of MetricData instances') m_a = MetricData('name', self.messages, self.dimensions) m_b = MetricData('name', self.messages, {}) self.assertNotEqual(m_a, m_b) m_a = MetricData('name', self.messages, self.dimensions) m_b = MetricData('name', self.messages, self.dimensions) m_a.message = 'ok' m_b.message = 'fail' self.assertNotEqual(m_a, m_b) m_a = MetricData('name', self.messages, self.dimensions) m_b = MetricData('name', self.messages, self.dimensions) m_a.value = 1 m_b.value = 2 self.assertNotEqual(m_a, m_b)
def get_physical_drive_info(slot): """ Parses drive data from hpssacli in the form. There are multiple drives in the output. array A physicaldrive 2C:1:1 Port: 2C Box: 1 Bay: 1 Status: OK Drive Type: Data Drive Interface Type: SAS Size: 2 TB Native Block Size: 512 Rotational Speed: 7200 Firmware Revision: HPD3 Serial Number: YFJMHTZD Model: HP MB2000FBUCL Current Temperature (C): 27 Maximum Temperature (C): 38 PHY Count: 2 PHY Transfer Rate: 6.0Gbps, Unknown """ results = [] drive_result = BASE_RESULT.child(dimensions={ 'controller_slot': str(slot), }) drive_result.name += '.physical_drive' rc = run_cmd( LOCK_FILE_COMMAND + 'hpssacli ctrl slot=%s pd all show detail' % slot) if rc.exitcode != 0: r = MetricData.single('check.failure', Severity.fail, '{check} slot: {slot} failed with: {error}', {'check': drive_result.name, 'slot': slot, 'error': str(rc.output), 'component': 'swiftlm-scan'}) return [r] # Remove blank lines and strip trailing/leading spaces for each line lines = [l.strip() for l in rc.output.split('\n') if l.strip()] if not lines: r = MetricData.single('check.failure', Severity.fail, '{check} slot: {slot} failed with: {error}', {'check': drive_result.name, 'slot': slot, 'error': 'No usable output from hpssacli', 'component': 'swiftlm-scan'}) return [r] if is_cont_heading(lines[0]): lines = lines[1:] drives = [] drive_info = {} for line in lines: # The first two lines for each drive are special. # The physicaldrive line will contain 2 colons and duplicates # information so we drop it. cc = line.count(':') if cc > 1: continue # The Array # line may be useful in the future but does not follow # the format of colon seperated infommation. # It is also the only delimiter between drives. We create a new # drive_info dict when we see it. if line.startswith('array '): if drive_info: drives.append(drive_info) drive_info = {} drive_info['array'] = line.split()[1] continue k, v = line.split(':', 1) k = k.strip().lower() v = v.strip() drive_info[k] = v # Have to add the last drive. if drive_info: drives.append(drive_info) for d in drives: results.extend(check_physical_drive(d, drive_result)) return results