def _get_object_info(self, account, container, obj, number): obj_conf = self.configs['object-server'] config_path = obj_conf[number] options = utils.readconf(config_path, 'app:object-server') swift_dir = options.get('swift_dir', '/etc/swift') ring = POLICIES.get_object_ring(int(self.policy), swift_dir) part, nodes = ring.get_nodes(account, container, obj) for node in nodes: # assumes one to one mapping if node['port'] == int(options.get('bind_port')): device = node['device'] break else: return None mgr = DiskFileManager(options, get_logger(options)) disk_file = mgr.get_diskfile(device, part, account, container, obj, self.policy) info = disk_file.read_metadata() return info
class TestAuditor(unittest.TestCase): def setUp(self): self.testdir = os.path.join(mkdtemp(), 'tmp_test_object_auditor') self.devices = os.path.join(self.testdir, 'node') self.rcache = os.path.join(self.testdir, 'object.recon') self.logger = FakeLogger() rmtree(self.testdir, ignore_errors=1) mkdirs(os.path.join(self.devices, 'sda')) self.objects = os.path.join(self.devices, 'sda', 'objects') os.mkdir(os.path.join(self.devices, 'sdb')) self.objects_2 = os.path.join(self.devices, 'sdb', 'objects') os.mkdir(self.objects) self.parts = {} for part in ['0', '1', '2', '3']: self.parts[part] = os.path.join(self.objects, part) os.mkdir(os.path.join(self.objects, part)) self.conf = dict( devices=self.devices, mount_check='false', object_size_stats='10,100,1024,10240') self.df_mgr = DiskFileManager(self.conf, self.logger) self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o') def tearDown(self): rmtree(os.path.dirname(self.testdir), ignore_errors=1) unit.xattr_data = {} def test_object_audit_extra_data(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) data = '0' * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() timestamp = str(normalize_timestamp(time.time())) metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) pre_quarantines = auditor_worker.quarantines auditor_worker.object_audit( AuditLocation(self.disk_file._datadir, 'sda', '0')) self.assertEquals(auditor_worker.quarantines, pre_quarantines) os.write(writer._fd, 'extra_data') auditor_worker.object_audit( AuditLocation(self.disk_file._datadir, 'sda', '0')) self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_audit_diff_data(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) data = '0' * 1024 etag = md5() timestamp = str(normalize_timestamp(time.time())) with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) pre_quarantines = auditor_worker.quarantines # remake so it will have metadata self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o') auditor_worker.object_audit( AuditLocation(self.disk_file._datadir, 'sda', '0')) self.assertEquals(auditor_worker.quarantines, pre_quarantines) etag = md5() etag.update('1' + '0' * 1023) etag = etag.hexdigest() metadata['ETag'] = etag with self.disk_file.create() as writer: writer.write(data) writer.put(metadata) auditor_worker.object_audit( AuditLocation(self.disk_file._datadir, 'sda', '0')) self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_audit_no_meta(self): timestamp = str(normalize_timestamp(time.time())) path = os.path.join(self.disk_file._datadir, timestamp + '.data') mkdirs(self.disk_file._datadir) fp = open(path, 'w') fp.write('0' * 1024) fp.close() invalidate_hash(os.path.dirname(self.disk_file._datadir)) auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) pre_quarantines = auditor_worker.quarantines auditor_worker.object_audit( AuditLocation(self.disk_file._datadir, 'sda', '0')) self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_audit_will_not_swallow_errors_in_tests(self): timestamp = str(normalize_timestamp(time.time())) path = os.path.join(self.disk_file._datadir, timestamp + '.data') mkdirs(self.disk_file._datadir) with open(path, 'w') as f: write_metadata(f, {'name': '/a/c/o'}) auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) def blowup(*args): raise NameError('tpyo') with mock.patch.object(DiskFileManager, 'get_diskfile_from_audit_location', blowup): self.assertRaises(NameError, auditor_worker.object_audit, AuditLocation(os.path.dirname(path), 'sda', '0')) def test_failsafe_object_audit_will_swallow_errors_in_tests(self): timestamp = str(normalize_timestamp(time.time())) path = os.path.join(self.disk_file._datadir, timestamp + '.data') mkdirs(self.disk_file._datadir) with open(path, 'w') as f: write_metadata(f, {'name': '/a/c/o'}) auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) def blowup(*args): raise NameError('tpyo') with mock.patch('swift.obj.diskfile.DiskFile', blowup): auditor_worker.failsafe_object_audit( AuditLocation(os.path.dirname(path), 'sda', '0')) self.assertEquals(auditor_worker.errors, 1) def test_generic_exception_handling(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) timestamp = str(normalize_timestamp(time.time())) pre_errors = auditor_worker.errors data = '0' * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) with mock.patch('swift.obj.diskfile.DiskFile', lambda *_: 1 / 0): auditor_worker.audit_all_objects() self.assertEquals(auditor_worker.errors, pre_errors + 1) def test_object_run_once_pass(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) auditor_worker.log_time = 0 timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines data = '0' * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) auditor_worker.audit_all_objects() self.assertEquals(auditor_worker.quarantines, pre_quarantines) self.assertEquals(auditor_worker.stats_buckets[1024], 1) self.assertEquals(auditor_worker.stats_buckets[10240], 0) # pick up some additional code coverage, large file data = '0' * 1024 * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb']) self.assertEquals(auditor_worker.quarantines, pre_quarantines) self.assertEquals(auditor_worker.stats_buckets[1024], 1) self.assertEquals(auditor_worker.stats_buckets[10240], 0) self.assertEquals(auditor_worker.stats_buckets['OVER'], 1) # pick up even more additional code coverage, misc paths auditor_worker.log_time = -1 auditor_worker.stats_sizes = [] auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb']) self.assertEquals(auditor_worker.quarantines, pre_quarantines) self.assertEquals(auditor_worker.stats_buckets[1024], 1) self.assertEquals(auditor_worker.stats_buckets[10240], 0) self.assertEquals(auditor_worker.stats_buckets['OVER'], 1) def test_object_run_once_no_sda(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines data = '0' * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) os.write(writer._fd, 'extra_data') auditor_worker.audit_all_objects() self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_run_once_multi_devices(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines data = '0' * 10 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) auditor_worker.audit_all_objects() self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'ob') data = '1' * 10 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) os.write(writer._fd, 'extra_data') auditor_worker.audit_all_objects() self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_run_fast_track_non_zero(self): self.auditor = auditor.ObjectAuditor(self.conf) self.auditor.log_time = 0 data = '0' * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': str(normalize_timestamp(time.time())), 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) etag = md5() etag.update('1' + '0' * 1023) etag = etag.hexdigest() metadata['ETag'] = etag write_metadata(writer._fd, metadata) quarantine_path = os.path.join(self.devices, 'sda', 'quarantined', 'objects') kwargs = {'mode': 'once'} kwargs['zero_byte_fps'] = 50 self.auditor.run_audit(**kwargs) self.assertFalse(os.path.isdir(quarantine_path)) del(kwargs['zero_byte_fps']) self.auditor.run_audit(**kwargs) self.assertTrue(os.path.isdir(quarantine_path)) def setup_bad_zero_byte(self, with_ts=False): self.auditor = auditor.ObjectAuditor(self.conf) self.auditor.log_time = 0 ts_file_path = '' if with_ts: name_hash = hash_path('a', 'c', 'o') dir_path = os.path.join( self.devices, 'sda', storage_directory(DATADIR, '0', name_hash)) ts_file_path = os.path.join(dir_path, '99999.ts') if not os.path.exists(dir_path): mkdirs(dir_path) fp = open(ts_file_path, 'w') write_metadata(fp, {'X-Timestamp': '99999', 'name': '/a/c/o'}) fp.close() etag = md5() with self.disk_file.create() as writer: etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': str(normalize_timestamp(time.time())), 'Content-Length': 10, } writer.put(metadata) etag = md5() etag = etag.hexdigest() metadata['ETag'] = etag write_metadata(writer._fd, metadata) return ts_file_path def test_object_run_fast_track_all(self): self.setup_bad_zero_byte() kwargs = {'mode': 'once'} self.auditor.run_audit(**kwargs) quarantine_path = os.path.join(self.devices, 'sda', 'quarantined', 'objects') self.assertTrue(os.path.isdir(quarantine_path)) def test_object_run_fast_track_zero(self): self.setup_bad_zero_byte() kwargs = {'mode': 'once'} kwargs['zero_byte_fps'] = 50 self.auditor.run_audit(**kwargs) quarantine_path = os.path.join(self.devices, 'sda', 'quarantined', 'objects') self.assertTrue(os.path.isdir(quarantine_path)) def test_object_run_fast_track_zero_check_closed(self): rat = [False] class FakeFile(DiskFile): def _quarantine(self, data_file, msg): rat[0] = True DiskFile._quarantine(self, data_file, msg) self.setup_bad_zero_byte() was_df = auditor.diskfile.DiskFile try: auditor.diskfile.DiskFile = FakeFile kwargs = {'mode': 'once'} kwargs['zero_byte_fps'] = 50 self.auditor.run_audit(**kwargs) quarantine_path = os.path.join(self.devices, 'sda', 'quarantined', 'objects') self.assertTrue(os.path.isdir(quarantine_path)) self.assertTrue(rat[0]) finally: auditor.diskfile.DiskFile = was_df def test_with_tombstone(self): ts_file_path = self.setup_bad_zero_byte(with_ts=True) self.assertTrue(ts_file_path.endswith('ts')) kwargs = {'mode': 'once'} self.auditor.run_audit(**kwargs) self.assertTrue(os.path.exists(ts_file_path)) def test_sleeper(self): auditor.SLEEP_BETWEEN_AUDITS = 0.10 my_auditor = auditor.ObjectAuditor(self.conf) start = time.time() my_auditor._sleep() delta_t = time.time() - start self.assert_(delta_t > 0.08) self.assert_(delta_t < 0.12) def test_run_audit(self): class StopForever(Exception): pass class Bogus(Exception): pass class ObjectAuditorMock(object): check_args = () check_kwargs = {} check_device_dir = None fork_called = 0 master = 0 wait_called = 0 def mock_run(self, *args, **kwargs): self.check_args = args self.check_kwargs = kwargs if 'zero_byte_fps' in kwargs: self.check_device_dir = kwargs.get('device_dirs') def mock_sleep(self): raise StopForever('stop') def mock_audit_loop_error(self, parent, zbo_fps, override_devices=None, **kwargs): raise Bogus('exception') def mock_fork(self): self.fork_called += 1 if self.master: return self.fork_called else: return 0 def mock_wait(self): self.wait_called += 1 return (self.wait_called, 0) for i in string.ascii_letters[2:26]: mkdirs(os.path.join(self.devices, 'sd%s' % i)) my_auditor = auditor.ObjectAuditor(dict(devices=self.devices, mount_check='false', zero_byte_files_per_second=89)) mocker = ObjectAuditorMock() my_auditor.logger.exception = mock.MagicMock() real_audit_loop = my_auditor.audit_loop my_auditor.audit_loop = mocker.mock_audit_loop_error my_auditor.run_audit = mocker.mock_run my_auditor._sleep = mocker.mock_sleep was_fork = os.fork was_wait = os.wait os.fork = mocker.mock_fork os.wait = mocker.mock_wait try: my_auditor.run_once(zero_byte_fps=50) my_auditor.logger.exception.assert_called_once_with( 'ERROR auditing: exception') my_auditor.logger.exception.reset_mock() self.assertRaises(StopForever, my_auditor.run_forever) my_auditor.logger.exception.assert_called_once_with( 'ERROR auditing: exception') my_auditor.audit_loop = real_audit_loop self.assertRaises(StopForever, my_auditor.run_forever, zero_byte_fps=50) self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 50) self.assertEquals(mocker.fork_called, 0) self.assertRaises(SystemExit, my_auditor.run_forever) self.assertEquals(mocker.fork_called, 1) self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 89) self.assertEquals(mocker.check_device_dir, None) self.assertEquals(mocker.check_args, ()) device_list = ['sd%s' % i for i in string.ascii_letters[2:10]] device_string = ','.join(device_list) device_string_bogus = device_string + ',bogus' mocker.fork_called = 0 self.assertRaises(SystemExit, my_auditor.run_once, devices=device_string_bogus) self.assertEquals(mocker.fork_called, 1) self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 89) self.assertEquals(sorted(mocker.check_device_dir), device_list) mocker.master = 1 mocker.fork_called = 0 self.assertRaises(StopForever, my_auditor.run_forever) # Fork is called 3 times since the zbf process is forked twice self.assertEquals(mocker.fork_called, 3) self.assertEquals(mocker.wait_called, 3) finally: os.fork = was_fork os.wait = was_wait
class TestAuditor(unittest.TestCase): def setUp(self): self.testdir = os.path.join(mkdtemp(), 'tmp_test_object_auditor') self.devices = os.path.join(self.testdir, 'node') self.rcache = os.path.join(self.testdir, 'object.recon') self.logger = FakeLogger() rmtree(self.testdir, ignore_errors=1) mkdirs(os.path.join(self.devices, 'sda')) os.mkdir(os.path.join(self.devices, 'sdb')) # policy 0 self.objects = os.path.join(self.devices, 'sda', get_data_dir(POLICIES[0])) self.objects_2 = os.path.join(self.devices, 'sdb', get_data_dir(POLICIES[0])) os.mkdir(self.objects) # policy 1 self.objects_p1 = os.path.join(self.devices, 'sda', get_data_dir(POLICIES[1])) self.objects_2_p1 = os.path.join(self.devices, 'sdb', get_data_dir(POLICIES[1])) os.mkdir(self.objects_p1) # policy 2 self.objects_p2 = os.path.join(self.devices, 'sda', get_data_dir(POLICIES[2])) self.objects_2_p2 = os.path.join(self.devices, 'sdb', get_data_dir(POLICIES[2])) os.mkdir(self.objects_p2) self.parts = {} self.parts_p1 = {} self.parts_p2 = {} for part in ['0', '1', '2', '3']: self.parts[part] = os.path.join(self.objects, part) self.parts_p1[part] = os.path.join(self.objects_p1, part) self.parts_p2[part] = os.path.join(self.objects_p2, part) os.mkdir(os.path.join(self.objects, part)) os.mkdir(os.path.join(self.objects_p1, part)) os.mkdir(os.path.join(self.objects_p2, part)) self.conf = dict( devices=self.devices, mount_check='false', object_size_stats='10,100,1024,10240') self.df_mgr = DiskFileManager(self.conf, self.logger) self.ec_df_mgr = ECDiskFileManager(self.conf, self.logger) # diskfiles for policy 0, 1, 2 self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o', policy=POLICIES[0]) self.disk_file_p1 = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o', policy=POLICIES[1]) self.disk_file_ec = self.ec_df_mgr.get_diskfile( 'sda', '0', 'a', 'c', 'o', policy=POLICIES[2], frag_index=1) def tearDown(self): rmtree(os.path.dirname(self.testdir), ignore_errors=1) unit.xattr_data = {} def test_worker_conf_parms(self): def check_common_defaults(): self.assertEqual(auditor_worker.max_bytes_per_second, 10000000) self.assertEqual(auditor_worker.log_time, 3600) # test default values conf = dict( devices=self.devices, mount_check='false', object_size_stats='10,100,1024,10240') auditor_worker = auditor.AuditorWorker(conf, self.logger, self.rcache, self.devices) check_common_defaults() for policy in POLICIES: mgr = auditor_worker.diskfile_router[policy] self.assertEqual(mgr.disk_chunk_size, 65536) self.assertEqual(auditor_worker.max_files_per_second, 20) self.assertEqual(auditor_worker.zero_byte_only_at_fps, 0) # test specified audit value overrides conf.update({'disk_chunk_size': 4096}) auditor_worker = auditor.AuditorWorker(conf, self.logger, self.rcache, self.devices, zero_byte_only_at_fps=50) check_common_defaults() for policy in POLICIES: mgr = auditor_worker.diskfile_router[policy] self.assertEqual(mgr.disk_chunk_size, 4096) self.assertEqual(auditor_worker.max_files_per_second, 50) self.assertEqual(auditor_worker.zero_byte_only_at_fps, 50) def test_object_audit_extra_data(self): def run_tests(disk_file): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) data = '0' * 1024 etag = md5() with disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() timestamp = str(normalize_timestamp(time.time())) metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) writer.commit(Timestamp(timestamp)) pre_quarantines = auditor_worker.quarantines auditor_worker.object_audit( AuditLocation(disk_file._datadir, 'sda', '0', policy=disk_file.policy)) self.assertEqual(auditor_worker.quarantines, pre_quarantines) os.write(writer._fd, 'extra_data') auditor_worker.object_audit( AuditLocation(disk_file._datadir, 'sda', '0', policy=disk_file.policy)) self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1) run_tests(self.disk_file) run_tests(self.disk_file_p1) run_tests(self.disk_file_ec) def test_object_audit_diff_data(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) data = '0' * 1024 etag = md5() timestamp = str(normalize_timestamp(time.time())) with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) writer.commit(Timestamp(timestamp)) pre_quarantines = auditor_worker.quarantines # remake so it will have metadata self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o', policy=POLICIES.legacy) auditor_worker.object_audit( AuditLocation(self.disk_file._datadir, 'sda', '0', policy=POLICIES.legacy)) self.assertEqual(auditor_worker.quarantines, pre_quarantines) etag = md5() etag.update('1' + '0' * 1023) etag = etag.hexdigest() metadata['ETag'] = etag with self.disk_file.create() as writer: writer.write(data) writer.put(metadata) writer.commit(Timestamp(timestamp)) auditor_worker.object_audit( AuditLocation(self.disk_file._datadir, 'sda', '0', policy=POLICIES.legacy)) self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1) def test_object_audit_no_meta(self): timestamp = str(normalize_timestamp(time.time())) path = os.path.join(self.disk_file._datadir, timestamp + '.data') mkdirs(self.disk_file._datadir) fp = open(path, 'w') fp.write('0' * 1024) fp.close() invalidate_hash(os.path.dirname(self.disk_file._datadir)) auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) pre_quarantines = auditor_worker.quarantines auditor_worker.object_audit( AuditLocation(self.disk_file._datadir, 'sda', '0', policy=POLICIES.legacy)) self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1) def test_object_audit_will_not_swallow_errors_in_tests(self): timestamp = str(normalize_timestamp(time.time())) path = os.path.join(self.disk_file._datadir, timestamp + '.data') mkdirs(self.disk_file._datadir) with open(path, 'w') as f: write_metadata(f, {'name': '/a/c/o'}) auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) def blowup(*args): raise NameError('tpyo') with mock.patch.object(DiskFileManager, 'get_diskfile_from_audit_location', blowup): self.assertRaises(NameError, auditor_worker.object_audit, AuditLocation(os.path.dirname(path), 'sda', '0', policy=POLICIES.legacy)) def test_failsafe_object_audit_will_swallow_errors_in_tests(self): timestamp = str(normalize_timestamp(time.time())) path = os.path.join(self.disk_file._datadir, timestamp + '.data') mkdirs(self.disk_file._datadir) with open(path, 'w') as f: write_metadata(f, {'name': '/a/c/o'}) auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) def blowup(*args): raise NameError('tpyo') with mock.patch('swift.obj.diskfile.DiskFileManager.diskfile_cls', blowup): auditor_worker.failsafe_object_audit( AuditLocation(os.path.dirname(path), 'sda', '0', policy=POLICIES.legacy)) self.assertEqual(auditor_worker.errors, 1) def test_generic_exception_handling(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) # pretend that we logged (and reset counters) just now auditor_worker.last_logged = time.time() timestamp = str(normalize_timestamp(time.time())) pre_errors = auditor_worker.errors data = '0' * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) writer.commit(Timestamp(timestamp)) with mock.patch('swift.obj.diskfile.DiskFileManager.diskfile_cls', lambda *_: 1 / 0): auditor_worker.audit_all_objects() self.assertEqual(auditor_worker.errors, pre_errors + 1) def test_object_run_once_pass(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) auditor_worker.log_time = 0 timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines data = '0' * 1024 def write_file(df): with df.create() as writer: writer.write(data) metadata = { 'ETag': md5(data).hexdigest(), 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) writer.commit(Timestamp(timestamp)) # policy 0 write_file(self.disk_file) # policy 1 write_file(self.disk_file_p1) # policy 2 write_file(self.disk_file_ec) auditor_worker.audit_all_objects() self.assertEqual(auditor_worker.quarantines, pre_quarantines) # 1 object per policy falls into 1024 bucket self.assertEqual(auditor_worker.stats_buckets[1024], 3) self.assertEqual(auditor_worker.stats_buckets[10240], 0) # pick up some additional code coverage, large file data = '0' * 1024 * 1024 for df in (self.disk_file, self.disk_file_ec): with df.create() as writer: writer.write(data) metadata = { 'ETag': md5(data).hexdigest(), 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) writer.commit(Timestamp(timestamp)) auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb']) self.assertEqual(auditor_worker.quarantines, pre_quarantines) # still have the 1024 byte object left in policy-1 (plus the # stats from the original 3) self.assertEqual(auditor_worker.stats_buckets[1024], 4) self.assertEqual(auditor_worker.stats_buckets[10240], 0) # and then policy-0 disk_file was re-written as a larger object self.assertEqual(auditor_worker.stats_buckets['OVER'], 2) # pick up even more additional code coverage, misc paths auditor_worker.log_time = -1 auditor_worker.stats_sizes = [] auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb']) self.assertEqual(auditor_worker.quarantines, pre_quarantines) self.assertEqual(auditor_worker.stats_buckets[1024], 4) self.assertEqual(auditor_worker.stats_buckets[10240], 0) self.assertEqual(auditor_worker.stats_buckets['OVER'], 2) def test_object_run_logging(self): logger = FakeLogger() auditor_worker = auditor.AuditorWorker(self.conf, logger, self.rcache, self.devices) auditor_worker.audit_all_objects(device_dirs=['sda']) log_lines = logger.get_lines_for_level('info') self.assertTrue(len(log_lines) > 0) self.assertTrue(log_lines[0].index('ALL - parallel, sda')) logger = FakeLogger() auditor_worker = auditor.AuditorWorker(self.conf, logger, self.rcache, self.devices, zero_byte_only_at_fps=50) auditor_worker.audit_all_objects(device_dirs=['sda']) log_lines = logger.get_lines_for_level('info') self.assertTrue(len(log_lines) > 0) self.assertTrue(log_lines[0].index('ZBF - sda')) def test_object_run_once_no_sda(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines # pretend that we logged (and reset counters) just now auditor_worker.last_logged = time.time() data = '0' * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) os.write(writer._fd, 'extra_data') writer.commit(Timestamp(timestamp)) auditor_worker.audit_all_objects() self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1) def test_object_run_once_multi_devices(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) # pretend that we logged (and reset counters) just now auditor_worker.last_logged = time.time() timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines data = '0' * 10 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) writer.commit(Timestamp(timestamp)) auditor_worker.audit_all_objects() self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'ob', policy=POLICIES.legacy) data = '1' * 10 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) writer.commit(Timestamp(timestamp)) os.write(writer._fd, 'extra_data') auditor_worker.audit_all_objects() self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1) def test_object_run_fast_track_non_zero(self): self.auditor = auditor.ObjectAuditor(self.conf) self.auditor.log_time = 0 data = '0' * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() timestamp = str(normalize_timestamp(time.time())) metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) writer.commit(Timestamp(timestamp)) etag = md5() etag.update('1' + '0' * 1023) etag = etag.hexdigest() metadata['ETag'] = etag write_metadata(writer._fd, metadata) quarantine_path = os.path.join(self.devices, 'sda', 'quarantined', 'objects') kwargs = {'mode': 'once'} kwargs['zero_byte_fps'] = 50 self.auditor.run_audit(**kwargs) self.assertFalse(os.path.isdir(quarantine_path)) del(kwargs['zero_byte_fps']) clear_auditor_status(self.devices) self.auditor.run_audit(**kwargs) self.assertTrue(os.path.isdir(quarantine_path)) def setup_bad_zero_byte(self, timestamp=None): if timestamp is None: timestamp = Timestamp(time.time()) self.auditor = auditor.ObjectAuditor(self.conf) self.auditor.log_time = 0 etag = md5() with self.disk_file.create() as writer: etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp.internal, 'Content-Length': 10, } writer.put(metadata) writer.commit(Timestamp(timestamp)) etag = md5() etag = etag.hexdigest() metadata['ETag'] = etag write_metadata(writer._fd, metadata) def test_object_run_fast_track_all(self): self.setup_bad_zero_byte() kwargs = {'mode': 'once'} self.auditor.run_audit(**kwargs) quarantine_path = os.path.join(self.devices, 'sda', 'quarantined', 'objects') self.assertTrue(os.path.isdir(quarantine_path)) def test_object_run_fast_track_zero(self): self.setup_bad_zero_byte() kwargs = {'mode': 'once'} kwargs['zero_byte_fps'] = 50 called_args = [0] def mock_get_auditor_status(path, logger, audit_type): called_args[0] = audit_type return get_auditor_status(path, logger, audit_type) with mock.patch('swift.obj.diskfile.get_auditor_status', mock_get_auditor_status): self.auditor.run_audit(**kwargs) quarantine_path = os.path.join(self.devices, 'sda', 'quarantined', 'objects') self.assertTrue(os.path.isdir(quarantine_path)) self.assertEqual('ZBF', called_args[0]) def test_object_run_fast_track_zero_check_closed(self): rat = [False] class FakeFile(DiskFile): def _quarantine(self, data_file, msg): rat[0] = True DiskFile._quarantine(self, data_file, msg) self.setup_bad_zero_byte() with mock.patch('swift.obj.diskfile.DiskFileManager.diskfile_cls', FakeFile): kwargs = {'mode': 'once'} kwargs['zero_byte_fps'] = 50 self.auditor.run_audit(**kwargs) quarantine_path = os.path.join(self.devices, 'sda', 'quarantined', 'objects') self.assertTrue(os.path.isdir(quarantine_path)) self.assertTrue(rat[0]) @mock.patch.object(auditor.ObjectAuditor, 'run_audit') @mock.patch('os.fork', return_value=0) def test_with_inaccessible_object_location(self, mock_os_fork, mock_run_audit): # Need to ensure that any failures in run_audit do # not prevent sys.exit() from running. Otherwise we get # zombie processes. e = OSError('permission denied') mock_run_audit.side_effect = e self.auditor = auditor.ObjectAuditor(self.conf) self.assertRaises(SystemExit, self.auditor.fork_child, self) def test_with_only_tombstone(self): # sanity check that auditor doesn't touch solitary tombstones ts_iter = make_timestamp_iter() self.setup_bad_zero_byte(timestamp=next(ts_iter)) self.disk_file.delete(next(ts_iter)) files = os.listdir(self.disk_file._datadir) self.assertEqual(1, len(files)) self.assertTrue(files[0].endswith('ts')) kwargs = {'mode': 'once'} self.auditor.run_audit(**kwargs) files_after = os.listdir(self.disk_file._datadir) self.assertEqual(files, files_after) def test_with_tombstone_and_data(self): # rsync replication could leave a tombstone and data file in object # dir - verify they are both removed during audit ts_iter = make_timestamp_iter() ts_tomb = next(ts_iter) ts_data = next(ts_iter) self.setup_bad_zero_byte(timestamp=ts_data) tomb_file_path = os.path.join(self.disk_file._datadir, '%s.ts' % ts_tomb.internal) with open(tomb_file_path, 'wb') as fd: write_metadata(fd, {'X-Timestamp': ts_tomb.internal}) files = os.listdir(self.disk_file._datadir) self.assertEqual(2, len(files)) self.assertTrue(os.path.basename(tomb_file_path) in files, files) kwargs = {'mode': 'once'} self.auditor.run_audit(**kwargs) self.assertFalse(os.path.exists(self.disk_file._datadir)) def test_sleeper(self): with mock.patch( 'time.sleep', mock.MagicMock()) as mock_sleep: my_auditor = auditor.ObjectAuditor(self.conf) my_auditor._sleep() mock_sleep.assert_called_with(30) my_conf = dict(interval=2) my_conf.update(self.conf) my_auditor = auditor.ObjectAuditor(my_conf) my_auditor._sleep() mock_sleep.assert_called_with(2) my_auditor = auditor.ObjectAuditor(self.conf) my_auditor.interval = 2 my_auditor._sleep() mock_sleep.assert_called_with(2) def test_run_parallel_audit(self): class StopForever(Exception): pass class Bogus(Exception): pass loop_error = Bogus('exception') class LetMeOut(BaseException): pass class ObjectAuditorMock(object): check_args = () check_kwargs = {} check_device_dir = None fork_called = 0 master = 0 wait_called = 0 def mock_run(self, *args, **kwargs): self.check_args = args self.check_kwargs = kwargs if 'zero_byte_fps' in kwargs: self.check_device_dir = kwargs.get('device_dirs') def mock_sleep_stop(self): raise StopForever('stop') def mock_sleep_continue(self): return def mock_audit_loop_error(self, parent, zbo_fps, override_devices=None, **kwargs): raise loop_error def mock_fork(self): self.fork_called += 1 if self.master: return self.fork_called else: return 0 def mock_wait(self): self.wait_called += 1 return (self.wait_called, 0) for i in string.ascii_letters[2:26]: mkdirs(os.path.join(self.devices, 'sd%s' % i)) my_auditor = auditor.ObjectAuditor(dict(devices=self.devices, mount_check='false', zero_byte_files_per_second=89, concurrency=1)) mocker = ObjectAuditorMock() my_auditor.logger.exception = mock.MagicMock() real_audit_loop = my_auditor.audit_loop my_auditor.audit_loop = mocker.mock_audit_loop_error my_auditor.run_audit = mocker.mock_run was_fork = os.fork was_wait = os.wait os.fork = mocker.mock_fork os.wait = mocker.mock_wait try: my_auditor._sleep = mocker.mock_sleep_stop my_auditor.run_once(zero_byte_fps=50) my_auditor.logger.exception.assert_called_once_with( 'ERROR auditing: %s', loop_error) my_auditor.logger.exception.reset_mock() self.assertRaises(StopForever, my_auditor.run_forever) my_auditor.logger.exception.assert_called_once_with( 'ERROR auditing: %s', loop_error) my_auditor.audit_loop = real_audit_loop self.assertRaises(StopForever, my_auditor.run_forever, zero_byte_fps=50) self.assertEqual(mocker.check_kwargs['zero_byte_fps'], 50) self.assertEqual(mocker.fork_called, 0) self.assertRaises(SystemExit, my_auditor.run_once) self.assertEqual(mocker.fork_called, 1) self.assertEqual(mocker.check_kwargs['zero_byte_fps'], 89) self.assertEqual(mocker.check_device_dir, []) self.assertEqual(mocker.check_args, ()) device_list = ['sd%s' % i for i in string.ascii_letters[2:10]] device_string = ','.join(device_list) device_string_bogus = device_string + ',bogus' mocker.fork_called = 0 self.assertRaises(SystemExit, my_auditor.run_once, devices=device_string_bogus) self.assertEqual(mocker.fork_called, 1) self.assertEqual(mocker.check_kwargs['zero_byte_fps'], 89) self.assertEqual(sorted(mocker.check_device_dir), device_list) mocker.master = 1 mocker.fork_called = 0 self.assertRaises(StopForever, my_auditor.run_forever) # Fork is called 2 times since the zbf process is forked just # once before self._sleep() is called and StopForever is raised # Also wait is called just once before StopForever is raised self.assertEqual(mocker.fork_called, 2) self.assertEqual(mocker.wait_called, 1) my_auditor._sleep = mocker.mock_sleep_continue my_auditor.audit_loop = works_only_once(my_auditor.audit_loop, LetMeOut()) my_auditor.concurrency = 2 mocker.fork_called = 0 mocker.wait_called = 0 self.assertRaises(LetMeOut, my_auditor.run_forever) # Fork is called no. of devices + (no. of devices)/2 + 1 times # since zbf process is forked (no.of devices)/2 + 1 times no_devices = len(os.listdir(self.devices)) self.assertEqual(mocker.fork_called, no_devices + no_devices / 2 + 1) self.assertEqual(mocker.wait_called, no_devices + no_devices / 2 + 1) finally: os.fork = was_fork os.wait = was_wait def test_run_audit_once(self): my_auditor = auditor.ObjectAuditor(dict(devices=self.devices, mount_check='false', zero_byte_files_per_second=89, concurrency=1)) forked_pids = [] next_zbf_pid = [2] next_normal_pid = [1001] outstanding_pids = [[]] def fake_fork_child(**kwargs): if len(forked_pids) > 10: # something's gone horribly wrong raise BaseException("forking too much") # ZBF pids are all smaller than the normal-audit pids; this way # we can return them first. # # Also, ZBF pids are even and normal-audit pids are odd; this is # so humans seeing this test fail can better tell what's happening. if kwargs.get('zero_byte_fps'): pid = next_zbf_pid[0] next_zbf_pid[0] += 2 else: pid = next_normal_pid[0] next_normal_pid[0] += 2 outstanding_pids[0].append(pid) forked_pids.append(pid) return pid def fake_os_wait(): # Smallest pid first; that's ZBF if we have one, else normal outstanding_pids[0].sort() pid = outstanding_pids[0].pop(0) return (pid, 0) # (pid, status) with mock.patch("swift.obj.auditor.os.wait", fake_os_wait), \ mock.patch.object(my_auditor, 'fork_child', fake_fork_child), \ mock.patch.object(my_auditor, '_sleep', lambda *a: None): my_auditor.run_once() self.assertEqual(sorted(forked_pids), [2, 1001]) def test_run_parallel_audit_once(self): my_auditor = auditor.ObjectAuditor( dict(devices=self.devices, mount_check='false', zero_byte_files_per_second=89, concurrency=2)) # ZBF pids are smaller than the normal-audit pids; this way we can # return them first from our mocked os.wait(). # # Also, ZBF pids are even and normal-audit pids are odd; this is so # humans seeing this test fail can better tell what's happening. forked_pids = [] next_zbf_pid = [2] next_normal_pid = [1001] outstanding_pids = [[]] def fake_fork_child(**kwargs): if len(forked_pids) > 10: # something's gone horribly wrong; try not to hang the test # run because of it raise BaseException("forking too much") if kwargs.get('zero_byte_fps'): pid = next_zbf_pid[0] next_zbf_pid[0] += 2 else: pid = next_normal_pid[0] next_normal_pid[0] += 2 outstanding_pids[0].append(pid) forked_pids.append(pid) return pid def fake_os_wait(): if not outstanding_pids[0]: raise BaseException("nobody waiting") # ZBF auditor finishes first outstanding_pids[0].sort() pid = outstanding_pids[0].pop(0) return (pid, 0) # (pid, status) # make sure we've got enough devs that the ZBF auditor can finish # before all the normal auditors have been started mkdirs(os.path.join(self.devices, 'sdc')) mkdirs(os.path.join(self.devices, 'sdd')) with mock.patch("swift.obj.auditor.os.wait", fake_os_wait), \ mock.patch.object(my_auditor, 'fork_child', fake_fork_child), \ mock.patch.object(my_auditor, '_sleep', lambda *a: None): my_auditor.run_once() self.assertEqual(sorted(forked_pids), [2, 1001, 1003, 1005, 1007])
class TestAuditor(unittest.TestCase): def setUp(self): self.testdir = os.path.join(mkdtemp(), "tmp_test_object_auditor") self.devices = os.path.join(self.testdir, "node") self.rcache = os.path.join(self.testdir, "object.recon") self.logger = FakeLogger() rmtree(self.testdir, ignore_errors=1) mkdirs(os.path.join(self.devices, "sda")) self.objects = os.path.join(self.devices, "sda", "objects") os.mkdir(os.path.join(self.devices, "sdb")) self.objects_2 = os.path.join(self.devices, "sdb", "objects") os.mkdir(self.objects) self.parts = {} for part in ["0", "1", "2", "3"]: self.parts[part] = os.path.join(self.objects, part) os.mkdir(os.path.join(self.objects, part)) self.conf = dict(devices=self.devices, mount_check="false", object_size_stats="10,100,1024,10240") self.df_mgr = DiskFileManager(self.conf, self.logger) self.disk_file = self.df_mgr.get_diskfile("sda", "0", "a", "c", "o") def tearDown(self): rmtree(os.path.dirname(self.testdir), ignore_errors=1) unit.xattr_data = {} def test_object_audit_extra_data(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) data = "0" * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() timestamp = str(normalize_timestamp(time.time())) metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)} writer.put(metadata) pre_quarantines = auditor_worker.quarantines auditor_worker.object_audit(AuditLocation(self.disk_file._datadir, "sda", "0")) self.assertEquals(auditor_worker.quarantines, pre_quarantines) os.write(writer._fd, "extra_data") auditor_worker.object_audit(AuditLocation(self.disk_file._datadir, "sda", "0")) self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_audit_diff_data(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) data = "0" * 1024 etag = md5() timestamp = str(normalize_timestamp(time.time())) with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)} writer.put(metadata) pre_quarantines = auditor_worker.quarantines # remake so it will have metadata self.disk_file = self.df_mgr.get_diskfile("sda", "0", "a", "c", "o") auditor_worker.object_audit(AuditLocation(self.disk_file._datadir, "sda", "0")) self.assertEquals(auditor_worker.quarantines, pre_quarantines) etag = md5() etag.update("1" + "0" * 1023) etag = etag.hexdigest() metadata["ETag"] = etag with self.disk_file.create() as writer: writer.write(data) writer.put(metadata) auditor_worker.object_audit(AuditLocation(self.disk_file._datadir, "sda", "0")) self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_audit_no_meta(self): timestamp = str(normalize_timestamp(time.time())) path = os.path.join(self.disk_file._datadir, timestamp + ".data") mkdirs(self.disk_file._datadir) fp = open(path, "w") fp.write("0" * 1024) fp.close() invalidate_hash(os.path.dirname(self.disk_file._datadir)) auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) pre_quarantines = auditor_worker.quarantines auditor_worker.object_audit(AuditLocation(self.disk_file._datadir, "sda", "0")) self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_audit_will_not_swallow_errors_in_tests(self): timestamp = str(normalize_timestamp(time.time())) path = os.path.join(self.disk_file._datadir, timestamp + ".data") mkdirs(self.disk_file._datadir) with open(path, "w") as f: write_metadata(f, {"name": "/a/c/o"}) auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) def blowup(*args): raise NameError("tpyo") with mock.patch.object(DiskFileManager, "get_diskfile_from_audit_location", blowup): self.assertRaises(NameError, auditor_worker.object_audit, AuditLocation(os.path.dirname(path), "sda", "0")) def test_failsafe_object_audit_will_swallow_errors_in_tests(self): timestamp = str(normalize_timestamp(time.time())) path = os.path.join(self.disk_file._datadir, timestamp + ".data") mkdirs(self.disk_file._datadir) with open(path, "w") as f: write_metadata(f, {"name": "/a/c/o"}) auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) def blowup(*args): raise NameError("tpyo") with mock.patch("swift.obj.diskfile.DiskFile", blowup): auditor_worker.failsafe_object_audit(AuditLocation(os.path.dirname(path), "sda", "0")) self.assertEquals(auditor_worker.errors, 1) def test_generic_exception_handling(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) timestamp = str(normalize_timestamp(time.time())) pre_errors = auditor_worker.errors data = "0" * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)} writer.put(metadata) with mock.patch("swift.obj.diskfile.DiskFile", lambda *_: 1 / 0): auditor_worker.audit_all_objects() self.assertEquals(auditor_worker.errors, pre_errors + 1) def test_object_run_once_pass(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) auditor_worker.log_time = 0 timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines data = "0" * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)} writer.put(metadata) auditor_worker.audit_all_objects() self.assertEquals(auditor_worker.quarantines, pre_quarantines) self.assertEquals(auditor_worker.stats_buckets[1024], 1) self.assertEquals(auditor_worker.stats_buckets[10240], 0) def test_object_run_once_no_sda(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines data = "0" * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)} writer.put(metadata) os.write(writer._fd, "extra_data") auditor_worker.audit_all_objects() self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_run_once_multi_devices(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines data = "0" * 10 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)} writer.put(metadata) auditor_worker.audit_all_objects() self.disk_file = self.df_mgr.get_diskfile("sda", "0", "a", "c", "ob") data = "1" * 10 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)} writer.put(metadata) os.write(writer._fd, "extra_data") auditor_worker.audit_all_objects() self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_run_fast_track_non_zero(self): self.auditor = auditor.ObjectAuditor(self.conf) self.auditor.log_time = 0 data = "0" * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { "ETag": etag, "X-Timestamp": str(normalize_timestamp(time.time())), "Content-Length": str(os.fstat(writer._fd).st_size), } writer.put(metadata) etag = md5() etag.update("1" + "0" * 1023) etag = etag.hexdigest() metadata["ETag"] = etag write_metadata(writer._fd, metadata) quarantine_path = os.path.join(self.devices, "sda", "quarantined", "objects") kwargs = {"mode": "once"} kwargs["zero_byte_fps"] = 50 self.auditor.run_audit(**kwargs) self.assertFalse(os.path.isdir(quarantine_path)) del (kwargs["zero_byte_fps"]) self.auditor.run_audit(**kwargs) self.assertTrue(os.path.isdir(quarantine_path)) def setup_bad_zero_byte(self, with_ts=False): self.auditor = auditor.ObjectAuditor(self.conf) self.auditor.log_time = 0 ts_file_path = "" if with_ts: name_hash = hash_path("a", "c", "o") dir_path = os.path.join(self.devices, "sda", storage_directory(DATADIR, "0", name_hash)) ts_file_path = os.path.join(dir_path, "99999.ts") if not os.path.exists(dir_path): mkdirs(dir_path) fp = open(ts_file_path, "w") write_metadata(fp, {"X-Timestamp": "99999", "name": "/a/c/o"}) fp.close() etag = md5() with self.disk_file.create() as writer: etag = etag.hexdigest() metadata = {"ETag": etag, "X-Timestamp": str(normalize_timestamp(time.time())), "Content-Length": 10} writer.put(metadata) etag = md5() etag = etag.hexdigest() metadata["ETag"] = etag write_metadata(writer._fd, metadata) return ts_file_path def test_object_run_fast_track_all(self): self.setup_bad_zero_byte() kwargs = {"mode": "once"} self.auditor.run_audit(**kwargs) quarantine_path = os.path.join(self.devices, "sda", "quarantined", "objects") self.assertTrue(os.path.isdir(quarantine_path)) def test_object_run_fast_track_zero(self): self.setup_bad_zero_byte() kwargs = {"mode": "once"} kwargs["zero_byte_fps"] = 50 self.auditor.run_audit(**kwargs) quarantine_path = os.path.join(self.devices, "sda", "quarantined", "objects") self.assertTrue(os.path.isdir(quarantine_path)) def test_object_run_fast_track_zero_check_closed(self): rat = [False] class FakeFile(DiskFile): def _quarantine(self, data_file, msg): rat[0] = True DiskFile._quarantine(self, data_file, msg) self.setup_bad_zero_byte() was_df = auditor.diskfile.DiskFile try: auditor.diskfile.DiskFile = FakeFile kwargs = {"mode": "once"} kwargs["zero_byte_fps"] = 50 self.auditor.run_audit(**kwargs) quarantine_path = os.path.join(self.devices, "sda", "quarantined", "objects") self.assertTrue(os.path.isdir(quarantine_path)) self.assertTrue(rat[0]) finally: auditor.diskfile.DiskFile = was_df def test_with_tombstone(self): ts_file_path = self.setup_bad_zero_byte(with_ts=True) self.assertTrue(ts_file_path.endswith("ts")) kwargs = {"mode": "once"} self.auditor.run_audit(**kwargs) self.assertTrue(os.path.exists(ts_file_path)) def test_sleeper(self): auditor.SLEEP_BETWEEN_AUDITS = 0.10 my_auditor = auditor.ObjectAuditor(self.conf) start = time.time() my_auditor._sleep() delta_t = time.time() - start self.assert_(delta_t > 0.08) self.assert_(delta_t < 0.12) def test_run_audit(self): class StopForever(Exception): pass class ObjectAuditorMock(object): check_args = () check_kwargs = {} check_device_dir = None fork_called = 0 master = 0 wait_called = 0 def mock_run(self, *args, **kwargs): self.check_args = args self.check_kwargs = kwargs if "zero_byte_fps" in kwargs: self.check_device_dir = kwargs.get("device_dirs") def mock_sleep(self): raise StopForever("stop") def mock_fork(self): self.fork_called += 1 if self.master: return self.fork_called else: return 0 def mock_wait(self): self.wait_called += 1 return (self.wait_called, 0) for i in string.ascii_letters[2:26]: mkdirs(os.path.join(self.devices, "sd%s" % i)) my_auditor = auditor.ObjectAuditor( dict(devices=self.devices, mount_check="false", zero_byte_files_per_second=89) ) mocker = ObjectAuditorMock() my_auditor.run_audit = mocker.mock_run my_auditor._sleep = mocker.mock_sleep was_fork = os.fork was_wait = os.wait try: os.fork = mocker.mock_fork os.wait = mocker.mock_wait self.assertRaises(StopForever, my_auditor.run_forever, zero_byte_fps=50) self.assertEquals(mocker.check_kwargs["zero_byte_fps"], 50) self.assertEquals(mocker.fork_called, 0) self.assertRaises(SystemExit, my_auditor.run_forever) self.assertEquals(mocker.fork_called, 1) self.assertEquals(mocker.check_kwargs["zero_byte_fps"], 89) self.assertEquals(mocker.check_device_dir, None) self.assertEquals(mocker.check_args, ()) device_list = ["sd%s" % i for i in string.ascii_letters[2:10]] device_string = ",".join(device_list) device_string_bogus = device_string + ",bogus" mocker.fork_called = 0 self.assertRaises(SystemExit, my_auditor.run_once, devices=device_string_bogus) self.assertEquals(mocker.fork_called, 1) self.assertEquals(mocker.check_kwargs["zero_byte_fps"], 89) self.assertEquals(sorted(mocker.check_device_dir), device_list) mocker.master = 1 mocker.fork_called = 0 self.assertRaises(StopForever, my_auditor.run_forever) # Fork is called 3 times since the zbf process is forked twice self.assertEquals(mocker.fork_called, 3) self.assertEquals(mocker.wait_called, 3) finally: os.fork = was_fork os.wait = was_wait
class TestAuditor(unittest.TestCase): def setUp(self): self.testdir = os.path.join(mkdtemp(), 'tmp_test_object_auditor') self.devices = os.path.join(self.testdir, 'node') self.logger = FakeLogger() rmtree(self.testdir, ignore_errors=1) mkdirs(os.path.join(self.devices, 'sda')) self.objects = os.path.join(self.devices, 'sda', 'objects') os.mkdir(os.path.join(self.devices, 'sdb')) self.objects_2 = os.path.join(self.devices, 'sdb', 'objects') os.mkdir(self.objects) self.parts = {} for part in ['0', '1', '2', '3']: self.parts[part] = os.path.join(self.objects, part) os.mkdir(os.path.join(self.objects, part)) self.conf = dict( devices=self.devices, mount_check='false', object_size_stats='10,100,1024,10240') self.df_mgr = DiskFileManager(self.conf, self.logger) self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o') def tearDown(self): rmtree(os.path.dirname(self.testdir), ignore_errors=1) unit.xattr_data = {} def test_object_audit_extra_data(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger) data = '0' * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() timestamp = str(normalize_timestamp(time.time())) metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) pre_quarantines = auditor_worker.quarantines auditor_worker.object_audit( os.path.join(self.disk_file._datadir, timestamp + '.data'), 'sda', '0') self.assertEquals(auditor_worker.quarantines, pre_quarantines) os.write(writer._fd, 'extra_data') auditor_worker.object_audit( os.path.join(self.disk_file._datadir, timestamp + '.data'), 'sda', '0') self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_audit_diff_data(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger) data = '0' * 1024 etag = md5() timestamp = str(normalize_timestamp(time.time())) with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) pre_quarantines = auditor_worker.quarantines # remake so it will have metadata self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o') auditor_worker.object_audit( os.path.join(self.disk_file._datadir, timestamp + '.data'), 'sda', '0') self.assertEquals(auditor_worker.quarantines, pre_quarantines) etag = md5() etag.update('1' + '0' * 1023) etag = etag.hexdigest() metadata['ETag'] = etag with self.disk_file.create() as writer: writer.write(data) writer.put(metadata) auditor_worker.object_audit( os.path.join(self.disk_file._datadir, timestamp + '.data'), 'sda', '0') self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_audit_no_meta(self): timestamp = str(normalize_timestamp(time.time())) path = os.path.join(self.disk_file._datadir, timestamp + '.data') mkdirs(self.disk_file._datadir) fp = open(path, 'w') fp.write('0' * 1024) fp.close() invalidate_hash(os.path.dirname(self.disk_file._datadir)) auditor_worker = auditor.AuditorWorker(self.conf, self.logger) pre_quarantines = auditor_worker.quarantines auditor_worker.object_audit( os.path.join(self.disk_file._datadir, timestamp + '.data'), 'sda', '0') self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_audit_will_not_swallow_errors_in_tests(self): timestamp = str(normalize_timestamp(time.time())) path = os.path.join(self.disk_file._datadir, timestamp + '.data') mkdirs(self.disk_file._datadir) with open(path, 'w') as f: write_metadata(f, {'name': '/a/c/o'}) auditor_worker = auditor.AuditorWorker(self.conf, self.logger) def blowup(*args): raise NameError('tpyo') with mock.patch('swift.obj.diskfile.DiskFile', blowup): self.assertRaises(NameError, auditor_worker.object_audit, path, 'sda', '0') def test_failsafe_object_audit_will_swallow_errors_in_tests(self): timestamp = str(normalize_timestamp(time.time())) path = os.path.join(self.disk_file._datadir, timestamp + '.data') mkdirs(self.disk_file._datadir) with open(path, 'w') as f: write_metadata(f, {'name': '/a/c/o'}) auditor_worker = auditor.AuditorWorker(self.conf, self.logger) def blowup(*args): raise NameError('tpyo') with mock.patch('swift.obj.diskfile.DiskFile', blowup): auditor_worker.failsafe_object_audit(path, 'sda', '0') self.assertEquals(auditor_worker.errors, 1) def test_generic_exception_handling(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger) timestamp = str(normalize_timestamp(time.time())) pre_errors = auditor_worker.errors data = '0' * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) with mock.patch('swift.obj.diskfile.DiskFile', lambda *_: 1 / 0): auditor_worker.audit_all_objects() self.assertEquals(auditor_worker.errors, pre_errors + 1) def test_object_run_once_pass(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger) auditor_worker.log_time = 0 timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines data = '0' * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) auditor_worker.audit_all_objects() self.assertEquals(auditor_worker.quarantines, pre_quarantines) self.assertEquals(auditor_worker.stats_buckets[1024], 1) self.assertEquals(auditor_worker.stats_buckets[10240], 0) def test_object_run_once_no_sda(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger) timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines data = '0' * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) os.write(writer._fd, 'extra_data') auditor_worker.audit_all_objects() self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_run_once_multi_devices(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger) timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines data = '0' * 10 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) auditor_worker.audit_all_objects() self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'ob') data = '1' * 10 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) os.write(writer._fd, 'extra_data') auditor_worker.audit_all_objects() self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_run_fast_track_non_zero(self): self.auditor = auditor.ObjectAuditor(self.conf) self.auditor.log_time = 0 data = '0' * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': str(normalize_timestamp(time.time())), 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) etag = md5() etag.update('1' + '0' * 1023) etag = etag.hexdigest() metadata['ETag'] = etag write_metadata(writer._fd, metadata) quarantine_path = os.path.join(self.devices, 'sda', 'quarantined', 'objects') self.auditor.run_once(zero_byte_fps=50) self.assertFalse(os.path.isdir(quarantine_path)) self.auditor.run_once() self.assertTrue(os.path.isdir(quarantine_path)) def setup_bad_zero_byte(self, with_ts=False): self.auditor = auditor.ObjectAuditor(self.conf) self.auditor.log_time = 0 ts_file_path = '' if with_ts: name_hash = hash_path('a', 'c', 'o') dir_path = os.path.join( self.devices, 'sda', storage_directory(DATADIR, '0', name_hash)) ts_file_path = os.path.join(dir_path, '99999.ts') if not os.path.exists(dir_path): mkdirs(dir_path) fp = open(ts_file_path, 'w') fp.close() etag = md5() with self.disk_file.create() as writer: etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': str(normalize_timestamp(time.time())), 'Content-Length': 10, } writer.put(metadata) etag = md5() etag = etag.hexdigest() metadata['ETag'] = etag write_metadata(writer._fd, metadata) return ts_file_path def test_object_run_fast_track_all(self): self.setup_bad_zero_byte() self.auditor.run_once() quarantine_path = os.path.join(self.devices, 'sda', 'quarantined', 'objects') self.assertTrue(os.path.isdir(quarantine_path)) def test_object_run_fast_track_zero(self): self.setup_bad_zero_byte() self.auditor.run_once(zero_byte_fps=50) quarantine_path = os.path.join(self.devices, 'sda', 'quarantined', 'objects') self.assertTrue(os.path.isdir(quarantine_path)) def test_object_run_fast_track_zero_check_closed(self): rat = [False] class FakeFile(DiskFile): def _quarantine(self, data_file, msg): rat[0] = True DiskFile._quarantine(self, data_file, msg) self.setup_bad_zero_byte() was_df = auditor.diskfile.DiskFile try: auditor.diskfile.DiskFile = FakeFile self.auditor.run_once(zero_byte_fps=50) quarantine_path = os.path.join(self.devices, 'sda', 'quarantined', 'objects') self.assertTrue(os.path.isdir(quarantine_path)) self.assertTrue(rat[0]) finally: auditor.diskfile.DiskFile = was_df def test_with_tombstone(self): ts_file_path = self.setup_bad_zero_byte(with_ts=True) self.auditor.run_once() self.assertTrue(ts_file_path.endswith('ts')) self.assertTrue(os.path.exists(ts_file_path)) def test_sleeper(self): auditor.SLEEP_BETWEEN_AUDITS = 0.10 my_auditor = auditor.ObjectAuditor(self.conf) start = time.time() my_auditor._sleep() delta_t = time.time() - start self.assert_(delta_t > 0.08) self.assert_(delta_t < 0.12) def test_run_forever(self): class StopForever(Exception): pass class ObjectAuditorMock(object): check_args = () check_kwargs = {} fork_called = 0 fork_res = 0 def mock_run(self, *args, **kwargs): self.check_args = args self.check_kwargs = kwargs def mock_sleep(self): raise StopForever('stop') def mock_fork(self): self.fork_called += 1 return self.fork_res my_auditor = auditor.ObjectAuditor(dict(devices=self.devices, mount_check='false', zero_byte_files_per_second=89)) mocker = ObjectAuditorMock() my_auditor.run_once = mocker.mock_run my_auditor._sleep = mocker.mock_sleep was_fork = os.fork try: os.fork = mocker.mock_fork self.assertRaises(StopForever, my_auditor.run_forever, zero_byte_fps=50) self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 50) self.assertEquals(mocker.fork_called, 0) self.assertRaises(StopForever, my_auditor.run_forever) self.assertEquals(mocker.fork_called, 1) self.assertEquals(mocker.check_args, ()) mocker.fork_res = 1 self.assertRaises(StopForever, my_auditor.run_forever) self.assertEquals(mocker.fork_called, 2) self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 89) finally: os.fork = was_fork
class ObjectController(BaseStorageServer): """Implements the WSGI application for the Swift Object Server.""" def __init__(self, conf, logger=None): """ Creates a new WSGI application for the Swift Object Server. An example configuration is given at <source-dir>/etc/object-server.conf-sample or /etc/swift/object-server.conf-sample. """ super(ObjectController, self).__init__(conf) self.logger = logger or get_logger(conf, log_route="object-server") self.node_timeout = int(conf.get("node_timeout", 3)) self.conn_timeout = float(conf.get("conn_timeout", 0.5)) self.client_timeout = int(conf.get("client_timeout", 60)) self.disk_chunk_size = int(conf.get("disk_chunk_size", 65536)) self.network_chunk_size = int(conf.get("network_chunk_size", 65536)) self.log_requests = config_true_value(conf.get("log_requests", "true")) self.max_upload_time = int(conf.get("max_upload_time", 86400)) self.slow = int(conf.get("slow", 0)) self.keep_cache_private = config_true_value(conf.get("keep_cache_private", "false")) default_allowed_headers = """ content-disposition, content-encoding, x-delete-at, x-object-manifest, x-static-large-object, """ extra_allowed_headers = [ header.strip().lower() for header in conf.get("allowed_headers", default_allowed_headers).split(",") if header.strip() ] self.allowed_headers = set() for header in extra_allowed_headers: if header not in DATAFILE_SYSTEM_META: self.allowed_headers.add(header) self.auto_create_account_prefix = conf.get("auto_create_account_prefix") or "." self.expiring_objects_account = self.auto_create_account_prefix + ( conf.get("expiring_objects_account_name") or "expiring_objects" ) self.expiring_objects_container_divisor = int(conf.get("expiring_objects_container_divisor") or 86400) # Initialization was successful, so now apply the network chunk size # parameter as the default read / write buffer size for the network # sockets. # # NOTE WELL: This is a class setting, so until we get set this on a # per-connection basis, this affects reading and writing on ALL # sockets, those between the proxy servers and external clients, and # those between the proxy servers and the other internal servers. # # ** Because the primary motivation for this is to optimize how data # is written back to the proxy server, we could use the value from the # disk_chunk_size parameter. However, it affects all created sockets # using this class so we have chosen to tie it to the # network_chunk_size parameter value instead. socket._fileobject.default_bufsize = self.network_chunk_size # Provide further setup specific to an object server implementation. self.setup(conf) def setup(self, conf): """ Implementation specific setup. This method is called at the very end by the constructor to allow a specific implementation to modify existing attributes or add its own attributes. :param conf: WSGI configuration parameter """ # Common on-disk hierarchy shared across account, container and object # servers. self._diskfile_mgr = DiskFileManager(conf, self.logger) # This is populated by global_conf_callback way below as the semaphore # is shared by all workers. if "replication_semaphore" in conf: # The value was put in a list so it could get past paste self.replication_semaphore = conf["replication_semaphore"][0] else: self.replication_semaphore = None self.replication_failure_threshold = int(conf.get("replication_failure_threshold") or 100) self.replication_failure_ratio = float(conf.get("replication_failure_ratio") or 1.0) def get_diskfile(self, device, partition, account, container, obj, policy_idx, **kwargs): """ Utility method for instantiating a DiskFile object supporting a given REST API. An implementation of the object server that wants to use a different DiskFile class would simply over-ride this method to provide that behavior. """ return self._diskfile_mgr.get_diskfile(device, partition, account, container, obj, policy_idx, **kwargs) def async_update( self, op, account, container, obj, host, partition, contdevice, headers_out, objdevice, policy_index ): """ Sends or saves an async update. :param op: operation performed (ex: 'PUT', or 'DELETE') :param account: account name for the object :param container: container name for the object :param obj: object name :param host: host that the container is on :param partition: partition that the container is on :param contdevice: device name that the container is on :param headers_out: dictionary of headers to send in the container request :param objdevice: device name that the object is in :param policy_index: the associated storage policy index """ headers_out["user-agent"] = "object-server %s" % os.getpid() full_path = "/%s/%s/%s" % (account, container, obj) if all([host, partition, contdevice]): try: with ConnectionTimeout(self.conn_timeout): ip, port = host.rsplit(":", 1) conn = http_connect(ip, port, contdevice, partition, op, full_path, headers_out) with Timeout(self.node_timeout): response = conn.getresponse() response.read() if is_success(response.status): return else: self.logger.error( _( "ERROR Container update failed " "(saving for async update later): %(status)d " "response from %(ip)s:%(port)s/%(dev)s" ), {"status": response.status, "ip": ip, "port": port, "dev": contdevice}, ) except (Exception, Timeout): self.logger.exception( _("ERROR container update failed with " "%(ip)s:%(port)s/%(dev)s (saving for async update later)"), {"ip": ip, "port": port, "dev": contdevice}, ) data = {"op": op, "account": account, "container": container, "obj": obj, "headers": headers_out} timestamp = headers_out["x-timestamp"] self._diskfile_mgr.pickle_async_update(objdevice, account, container, obj, data, timestamp, policy_index) def container_update(self, op, account, container, obj, request, headers_out, objdevice, policy_idx): """ Update the container when objects are updated. :param op: operation performed (ex: 'PUT', or 'DELETE') :param account: account name for the object :param container: container name for the object :param obj: object name :param request: the original request object driving the update :param headers_out: dictionary of headers to send in the container request(s) :param objdevice: device name that the object is in """ headers_in = request.headers conthosts = [h.strip() for h in headers_in.get("X-Container-Host", "").split(",")] contdevices = [d.strip() for d in headers_in.get("X-Container-Device", "").split(",")] contpartition = headers_in.get("X-Container-Partition", "") if len(conthosts) != len(contdevices): # This shouldn't happen unless there's a bug in the proxy, # but if there is, we want to know about it. self.logger.error( _( "ERROR Container update failed: different " "numbers of hosts and devices in request: " '"%s" vs "%s"' ) % (headers_in.get("X-Container-Host", ""), headers_in.get("X-Container-Device", "")) ) return if contpartition: updates = zip(conthosts, contdevices) else: updates = [] headers_out["x-trans-id"] = headers_in.get("x-trans-id", "-") headers_out["referer"] = request.as_referer() headers_out["X-Backend-Storage-Policy-Index"] = policy_idx for conthost, contdevice in updates: self.async_update( op, account, container, obj, conthost, contpartition, contdevice, headers_out, objdevice, policy_idx ) def delete_at_update(self, op, delete_at, account, container, obj, request, objdevice, policy_index): """ Update the expiring objects container when objects are updated. :param op: operation performed (ex: 'PUT', or 'DELETE') :param delete_at: scheduled delete in UNIX seconds, int :param account: account name for the object :param container: container name for the object :param obj: object name :param request: the original request driving the update :param objdevice: device name that the object is in :param policy_index: the policy index to be used for tmp dir """ if config_true_value(request.headers.get("x-backend-replication", "f")): return delete_at = normalize_delete_at_timestamp(delete_at) updates = [(None, None)] partition = None hosts = contdevices = [None] headers_in = request.headers headers_out = HeaderKeyDict( { # system accounts are always Policy-0 "X-Backend-Storage-Policy-Index": 0, "x-timestamp": request.timestamp.internal, "x-trans-id": headers_in.get("x-trans-id", "-"), "referer": request.as_referer(), } ) if op != "DELETE": delete_at_container = headers_in.get("X-Delete-At-Container", None) if not delete_at_container: self.logger.warning( "X-Delete-At-Container header must be specified for " "expiring objects background %s to work properly. Making " "best guess as to the container name for now." % op ) # TODO(gholt): In a future release, change the above warning to # a raised exception and remove the guess code below. delete_at_container = get_expirer_container( delete_at, self.expiring_objects_container_divisor, account, container, obj ) partition = headers_in.get("X-Delete-At-Partition", None) hosts = headers_in.get("X-Delete-At-Host", "") contdevices = headers_in.get("X-Delete-At-Device", "") updates = [ upd for upd in zip((h.strip() for h in hosts.split(",")), (c.strip() for c in contdevices.split(","))) if all(upd) and partition ] if not updates: updates = [(None, None)] headers_out["x-size"] = "0" headers_out["x-content-type"] = "text/plain" headers_out["x-etag"] = "d41d8cd98f00b204e9800998ecf8427e" else: # DELETEs of old expiration data have no way of knowing what the # old X-Delete-At-Container was at the time of the initial setting # of the data, so a best guess is made here. # Worst case is a DELETE is issued now for something that doesn't # exist there and the original data is left where it is, where # it will be ignored when the expirer eventually tries to issue the # object DELETE later since the X-Delete-At value won't match up. delete_at_container = get_expirer_container( delete_at, self.expiring_objects_container_divisor, account, container, obj ) delete_at_container = normalize_delete_at_timestamp(delete_at_container) for host, contdevice in updates: self.async_update( op, self.expiring_objects_account, delete_at_container, "%s-%s/%s/%s" % (delete_at, account, container, obj), host, partition, contdevice, headers_out, objdevice, policy_index, ) @public @timing_stats() def POST(self, request): """Handle HTTP POST requests for the Swift Object Server.""" device, partition, account, container, obj, policy_idx = get_name_and_placement(request, 5, 5, True) req_timestamp = valid_timestamp(request) new_delete_at = int(request.headers.get("X-Delete-At") or 0) if new_delete_at and new_delete_at < time.time(): return HTTPBadRequest(body="X-Delete-At in past", request=request, content_type="text/plain") try: disk_file = self.get_diskfile(device, partition, account, container, obj, policy_idx=policy_idx) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=device, request=request) try: orig_metadata = disk_file.read_metadata() except DiskFileXattrNotSupported: return HTTPInsufficientStorage(drive=device, request=request) except (DiskFileNotExist, DiskFileQuarantined): return HTTPNotFound(request=request) orig_timestamp = Timestamp(orig_metadata.get("X-Timestamp", 0)) if orig_timestamp >= req_timestamp: return HTTPConflict(request=request, headers={"X-Backend-Timestamp": orig_timestamp.internal}) metadata = {"X-Timestamp": req_timestamp.internal} metadata.update(val for val in request.headers.iteritems() if is_user_meta("object", val[0])) for header_key in self.allowed_headers: if header_key in request.headers: header_caps = header_key.title() metadata[header_caps] = request.headers[header_key] orig_delete_at = int(orig_metadata.get("X-Delete-At") or 0) if orig_delete_at != new_delete_at: if new_delete_at: self.delete_at_update("PUT", new_delete_at, account, container, obj, request, device, policy_idx) if orig_delete_at: self.delete_at_update("DELETE", orig_delete_at, account, container, obj, request, device, policy_idx) try: disk_file.write_metadata(metadata) except (DiskFileXattrNotSupported, DiskFileNoSpace): return HTTPInsufficientStorage(drive=device, request=request) return HTTPAccepted(request=request) @public @timing_stats() def PUT(self, request): """Handle HTTP PUT requests for the Swift Object Server.""" device, partition, account, container, obj, policy_idx = get_name_and_placement(request, 5, 5, True) req_timestamp = valid_timestamp(request) error_response = check_object_creation(request, obj) if error_response: return error_response new_delete_at = int(request.headers.get("X-Delete-At") or 0) if new_delete_at and new_delete_at < time.time(): return HTTPBadRequest(body="X-Delete-At in past", request=request, content_type="text/plain") try: fsize = request.message_length() except ValueError as e: return HTTPBadRequest(body=str(e), request=request, content_type="text/plain") try: disk_file = self.get_diskfile(device, partition, account, container, obj, policy_idx=policy_idx) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=device, request=request) try: orig_metadata = disk_file.read_metadata() except DiskFileXattrNotSupported: return HTTPInsufficientStorage(drive=device, request=request) except (DiskFileNotExist, DiskFileQuarantined): orig_metadata = {} # Checks for If-None-Match if request.if_none_match is not None and orig_metadata: if "*" in request.if_none_match: # File exists already so return 412 return HTTPPreconditionFailed(request=request) if orig_metadata.get("ETag") in request.if_none_match: # The current ETag matches, so return 412 return HTTPPreconditionFailed(request=request) orig_timestamp = Timestamp(orig_metadata.get("X-Timestamp", 0)) if orig_timestamp >= req_timestamp: return HTTPConflict(request=request, headers={"X-Backend-Timestamp": orig_timestamp.internal}) orig_delete_at = int(orig_metadata.get("X-Delete-At") or 0) upload_expiration = time.time() + self.max_upload_time etag = md5() elapsed_time = 0 try: with disk_file.create(size=fsize) as writer: upload_size = 0 def timeout_reader(): with ChunkReadTimeout(self.client_timeout): return request.environ["wsgi.input"].read(self.network_chunk_size) try: for chunk in iter(lambda: timeout_reader(), ""): start_time = time.time() if start_time > upload_expiration: self.logger.increment("PUT.timeouts") return HTTPRequestTimeout(request=request) etag.update(chunk) upload_size = writer.write(chunk) elapsed_time += time.time() - start_time except ChunkReadTimeout: return HTTPRequestTimeout(request=request) if upload_size: self.logger.transfer_rate("PUT." + device + ".timing", elapsed_time, upload_size) if fsize is not None and fsize != upload_size: return HTTPClientDisconnect(request=request) etag = etag.hexdigest() if "etag" in request.headers and request.headers["etag"].lower() != etag: return HTTPUnprocessableEntity(request=request) metadata = { "X-Timestamp": request.timestamp.internal, "Content-Type": request.headers["content-type"], "ETag": etag, "Content-Length": str(upload_size), } metadata.update(val for val in request.headers.iteritems() if is_sys_or_user_meta("object", val[0])) headers_to_copy = request.headers.get("X-Backend-Replication-Headers", "").split() + list( self.allowed_headers ) for header_key in headers_to_copy: if header_key in request.headers: header_caps = header_key.title() metadata[header_caps] = request.headers[header_key] writer.put(metadata) except (DiskFileXattrNotSupported, DiskFileNoSpace): return HTTPInsufficientStorage(drive=device, request=request) if orig_delete_at != new_delete_at: if new_delete_at: self.delete_at_update("PUT", new_delete_at, account, container, obj, request, device, policy_idx) if orig_delete_at: self.delete_at_update("DELETE", orig_delete_at, account, container, obj, request, device, policy_idx) self.container_update( "PUT", account, container, obj, request, HeaderKeyDict( { "x-size": metadata["Content-Length"], "x-content-type": metadata["Content-Type"], "x-timestamp": metadata["X-Timestamp"], "x-etag": metadata["ETag"], } ), device, policy_idx, ) return HTTPCreated(request=request, etag=etag) @public @timing_stats() def GET(self, request): """Handle HTTP GET requests for the Swift Object Server.""" device, partition, account, container, obj, policy_idx = get_name_and_placement(request, 5, 5, True) keep_cache = self.keep_cache_private or ( "X-Auth-Token" not in request.headers and "X-Storage-Token" not in request.headers ) try: disk_file = self.get_diskfile(device, partition, account, container, obj, policy_idx=policy_idx) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=device, request=request) try: with disk_file.open(): metadata = disk_file.get_metadata() obj_size = int(metadata["Content-Length"]) file_x_ts = Timestamp(metadata["X-Timestamp"]) keep_cache = self.keep_cache_private or ( "X-Auth-Token" not in request.headers and "X-Storage-Token" not in request.headers ) response = Response( app_iter=disk_file.reader(keep_cache=keep_cache), request=request, conditional_response=True ) response.headers["Content-Type"] = metadata.get("Content-Type", "application/octet-stream") for key, value in metadata.iteritems(): if is_sys_or_user_meta("object", key) or key.lower() in self.allowed_headers: response.headers[key] = value response.etag = metadata["ETag"] response.last_modified = math.ceil(float(file_x_ts)) response.content_length = obj_size try: response.content_encoding = metadata["Content-Encoding"] except KeyError: pass response.headers["X-Timestamp"] = file_x_ts.normal response.headers["X-Backend-Timestamp"] = file_x_ts.internal resp = request.get_response(response) except DiskFileXattrNotSupported: return HTTPInsufficientStorage(drive=device, request=request) except (DiskFileNotExist, DiskFileQuarantined) as e: headers = {} if hasattr(e, "timestamp"): headers["X-Backend-Timestamp"] = e.timestamp.internal resp = HTTPNotFound(request=request, headers=headers, conditional_response=True) return resp @public @timing_stats(sample_rate=0.8) def HEAD(self, request): """Handle HTTP HEAD requests for the Swift Object Server.""" device, partition, account, container, obj, policy_idx = get_name_and_placement(request, 5, 5, True) try: disk_file = self.get_diskfile(device, partition, account, container, obj, policy_idx=policy_idx) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=device, request=request) try: metadata = disk_file.read_metadata() except DiskFileXattrNotSupported: return HTTPInsufficientStorage(drive=device, request=request) except (DiskFileNotExist, DiskFileQuarantined) as e: headers = {} if hasattr(e, "timestamp"): headers["X-Backend-Timestamp"] = e.timestamp.internal return HTTPNotFound(request=request, headers=headers, conditional_response=True) response = Response(request=request, conditional_response=True) response.headers["Content-Type"] = metadata.get("Content-Type", "application/octet-stream") for key, value in metadata.iteritems(): if is_sys_or_user_meta("object", key) or key.lower() in self.allowed_headers: response.headers[key] = value response.etag = metadata["ETag"] ts = Timestamp(metadata["X-Timestamp"]) response.last_modified = math.ceil(float(ts)) # Needed for container sync feature response.headers["X-Timestamp"] = ts.normal response.headers["X-Backend-Timestamp"] = ts.internal response.content_length = int(metadata["Content-Length"]) try: response.content_encoding = metadata["Content-Encoding"] except KeyError: pass return response @public @timing_stats() def DELETE(self, request): """Handle HTTP DELETE requests for the Swift Object Server.""" device, partition, account, container, obj, policy_idx = get_name_and_placement(request, 5, 5, True) req_timestamp = valid_timestamp(request) try: disk_file = self.get_diskfile(device, partition, account, container, obj, policy_idx=policy_idx) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=device, request=request) try: orig_metadata = disk_file.read_metadata() except DiskFileXattrNotSupported: return HTTPInsufficientStorage(drive=device, request=request) except DiskFileExpired as e: orig_timestamp = e.timestamp orig_metadata = e.metadata response_class = HTTPNotFound except DiskFileDeleted as e: orig_timestamp = e.timestamp orig_metadata = {} response_class = HTTPNotFound except (DiskFileNotExist, DiskFileQuarantined): orig_timestamp = 0 orig_metadata = {} response_class = HTTPNotFound else: orig_timestamp = Timestamp(orig_metadata.get("X-Timestamp", 0)) if orig_timestamp < req_timestamp: response_class = HTTPNoContent else: response_class = HTTPConflict response_timestamp = max(orig_timestamp, req_timestamp) orig_delete_at = int(orig_metadata.get("X-Delete-At") or 0) try: req_if_delete_at_val = request.headers["x-if-delete-at"] req_if_delete_at = int(req_if_delete_at_val) except KeyError: pass except ValueError: return HTTPBadRequest(request=request, body="Bad X-If-Delete-At header value") else: # request includes x-if-delete-at; we must not place a tombstone # if we can not verify the x-if-delete-at time if not orig_timestamp: # no object found at all return HTTPNotFound() if orig_delete_at != req_if_delete_at: return HTTPPreconditionFailed(request=request, body="X-If-Delete-At and X-Delete-At do not match") else: # differentiate success from no object at all response_class = HTTPNoContent if orig_delete_at: self.delete_at_update("DELETE", orig_delete_at, account, container, obj, request, device, policy_idx) if orig_timestamp < req_timestamp: disk_file.delete(req_timestamp) self.container_update( "DELETE", account, container, obj, request, HeaderKeyDict({"x-timestamp": req_timestamp.internal}), device, policy_idx, ) return response_class(request=request, headers={"X-Backend-Timestamp": response_timestamp.internal}) @public @replication @timing_stats(sample_rate=0.1) def REPLICATE(self, request): """ Handle REPLICATE requests for the Swift Object Server. This is used by the object replicator to get hashes for directories. """ device, partition, suffix, policy_idx = get_name_and_placement(request, 2, 3, True) try: hashes = self._diskfile_mgr.get_hashes(device, partition, suffix, policy_idx) except DiskFileDeviceUnavailable: resp = HTTPInsufficientStorage(drive=device, request=request) else: resp = Response(body=pickle.dumps(hashes)) return resp @public @replication @timing_stats(sample_rate=0.1) def REPLICATION(self, request): return Response(app_iter=ssync_receiver.Receiver(self, request)()) def __call__(self, env, start_response): """WSGI Application entry point for the Swift Object Server.""" start_time = time.time() req = Request(env) self.logger.txn_id = req.headers.get("x-trans-id", None) if not check_utf8(req.path_info): res = HTTPPreconditionFailed(body="Invalid UTF8 or contains NULL") else: try: # disallow methods which have not been marked 'public' try: if req.method not in self.allowed_methods: raise AttributeError("Not allowed method.") except AttributeError: res = HTTPMethodNotAllowed() else: method = getattr(self, req.method) res = method(req) except DiskFileCollision: res = HTTPForbidden(request=req) except HTTPException as error_response: res = error_response except (Exception, Timeout): self.logger.exception( _("ERROR __call__ error with %(method)s" " %(path)s "), {"method": req.method, "path": req.path} ) res = HTTPInternalServerError(body=traceback.format_exc()) trans_time = time.time() - start_time if self.log_requests: log_line = get_log_line(req, res, trans_time, "") if req.method in ("REPLICATE", "REPLICATION") or "X-Backend-Replication" in req.headers: self.logger.debug(log_line) else: self.logger.info(log_line) if req.method in ("PUT", "DELETE"): slow = self.slow - trans_time if slow > 0: sleep(slow) # To be able to zero-copy send the object, we need a few things. # First, we have to be responding successfully to a GET, or else we're # not sending the object. Second, we have to be able to extract the # socket file descriptor from the WSGI input object. Third, the # diskfile has to support zero-copy send. # # There's a good chance that this could work for 206 responses too, # but the common case is sending the whole object, so we'll start # there. if req.method == "GET" and res.status_int == 200 and isinstance(env["wsgi.input"], wsgi.Input): app_iter = getattr(res, "app_iter", None) checker = getattr(app_iter, "can_zero_copy_send", None) if checker and checker(): # For any kind of zero-copy thing like sendfile or splice, we # need the file descriptor. Eventlet doesn't provide a clean # way of getting that, so we resort to this. wsock = env["wsgi.input"].get_socket() wsockfd = wsock.fileno() # Don't call zero_copy_send() until after we force the HTTP # headers out of Eventlet and into the socket. def zero_copy_iter(): # If possible, set TCP_CORK so that headers don't # immediately go on the wire, but instead, wait for some # response body to make the TCP frames as large as # possible (and hence as few packets as possible). # # On non-Linux systems, we might consider TCP_NODELAY, but # since the only known zero-copy-capable diskfile uses # Linux-specific syscalls, we'll defer that work until # someone needs it. if hasattr(socket, "TCP_CORK"): wsock.setsockopt(socket.IPPROTO_TCP, socket.TCP_CORK, 1) yield EventletPlungerString() try: app_iter.zero_copy_send(wsockfd) except Exception: self.logger.exception("zero_copy_send() blew up") raise yield "" # Get headers ready to go out res(env, start_response) return zero_copy_iter() else: return res(env, start_response) else: return res(env, start_response)
class RestoreMiddleware(object): def __init__(self, app, conf, *args, **kwargs): self.app = app self.conf = conf self.logger = get_logger(self.conf, log_route='restore') self._diskfile_mgr = DiskFileManager(conf, self.logger) def __call__(self, env, start_response): req = Request(env) if (req.method == 'PUT') or (req.method == 'POST'): if 'X-Object-Meta-S3-Restored' in req.headers: return self.save_object(env)(env, start_response) if 'X-Object-Meta-S3-Restore' in req.headers: return self.set_restoring(env)(env, start_response) return self.app(env, start_response) def _split_request_path(self, req): self.device, self.partition, self.account, self.container, \ self.obj = split_and_validate_path(req, 5, 5, True) def save_object(self, env): # Restorer 데몬에 의해 호출됨 req = Request(env) self._split_request_path(req) try: disk_file = self.get_diskfile(self.device, self.partition, self.account, self.container, self.obj) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=self.device, request=Request(env)) ori_meta = disk_file.read_metadata() metadata = {} metadata.update(val for val in req.headers.iteritems() if is_user_meta('object', val[0])) del metadata['X-Object-Meta-S3-Restored'] # Timestamp 값 유지 metadata['X-Timestamp'] = ori_meta['X-Timestamp'] metadata['Content-Type'] = ori_meta['Content-Type'] fsize = req.message_length() etag = md5() try: with disk_file.create(size=fsize) as writer: def timeout_reader(): with ChunkReadTimeout(60): return req.environ['wsgi.input'].read(65536) try: for chunk in iter(lambda: timeout_reader(), ''): etag.update(chunk) writer.write(chunk) except ChunkReadTimeout: return HTTPRequestTimeout(request=req) etag = etag.hexdigest() metadata['ETag'] = etag metadata['Content-Length'] = str(fsize) writer.put(metadata) except DiskFileNoSpace: return HTTPInsufficientStorage(drive=self.device, request=req) return HTTPCreated(request=req, etag=etag) def set_restoring(self, env): # Lifecycle Middleware 에서 restore 중이라고 object 를 설정할 때 호출됨 req = Request(env) self._split_request_path(req) try: disk_file = self.get_diskfile(self.device, self.partition, self.account, self.container, self.obj) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=self.device, request=Request(env)) ori_meta = disk_file.read_metadata() metadata = ori_meta metadata.update(val for val in req.headers.iteritems() if is_user_meta('object', val[0])) # Timestamp 값 유지 with disk_file.create(size=0) as writer: writer.put(metadata) return HTTPCreated(request=req, etag=ori_meta['ETag']) def get_diskfile(self, device, partition, account, container, obj, **kwargs): return self._diskfile_mgr.get_diskfile(device, partition, account, container, obj, **kwargs)
class TestAuditor(unittest.TestCase): def setUp(self): self.testdir = os.path.join(mkdtemp(), 'tmp_test_object_auditor') self.devices = os.path.join(self.testdir, 'node') self.logger = FakeLogger() rmtree(self.testdir, ignore_errors=1) mkdirs(os.path.join(self.devices, 'sda')) self.objects = os.path.join(self.devices, 'sda', 'objects') os.mkdir(os.path.join(self.devices, 'sdb')) self.objects_2 = os.path.join(self.devices, 'sdb', 'objects') os.mkdir(self.objects) self.parts = {} for part in ['0', '1', '2', '3']: self.parts[part] = os.path.join(self.objects, part) os.mkdir(os.path.join(self.objects, part)) self.conf = dict(devices=self.devices, mount_check='false', object_size_stats='10,100,1024,10240') self.df_mgr = DiskFileManager(self.conf, self.logger) self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o') def tearDown(self): rmtree(os.path.dirname(self.testdir), ignore_errors=1) unit.xattr_data = {} def test_object_audit_extra_data(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger) data = '0' * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() timestamp = str(normalize_timestamp(time.time())) metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) pre_quarantines = auditor_worker.quarantines auditor_worker.object_audit( AuditLocation(self.disk_file._datadir, 'sda', '0')) self.assertEquals(auditor_worker.quarantines, pre_quarantines) os.write(writer._fd, 'extra_data') auditor_worker.object_audit( AuditLocation(self.disk_file._datadir, 'sda', '0')) self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_audit_diff_data(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger) data = '0' * 1024 etag = md5() timestamp = str(normalize_timestamp(time.time())) with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) pre_quarantines = auditor_worker.quarantines # remake so it will have metadata self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o') auditor_worker.object_audit( AuditLocation(self.disk_file._datadir, 'sda', '0')) self.assertEquals(auditor_worker.quarantines, pre_quarantines) etag = md5() etag.update('1' + '0' * 1023) etag = etag.hexdigest() metadata['ETag'] = etag with self.disk_file.create() as writer: writer.write(data) writer.put(metadata) auditor_worker.object_audit( AuditLocation(self.disk_file._datadir, 'sda', '0')) self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_audit_no_meta(self): timestamp = str(normalize_timestamp(time.time())) path = os.path.join(self.disk_file._datadir, timestamp + '.data') mkdirs(self.disk_file._datadir) fp = open(path, 'w') fp.write('0' * 1024) fp.close() invalidate_hash(os.path.dirname(self.disk_file._datadir)) auditor_worker = auditor.AuditorWorker(self.conf, self.logger) pre_quarantines = auditor_worker.quarantines auditor_worker.object_audit( AuditLocation(self.disk_file._datadir, 'sda', '0')) self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_audit_will_not_swallow_errors_in_tests(self): timestamp = str(normalize_timestamp(time.time())) path = os.path.join(self.disk_file._datadir, timestamp + '.data') mkdirs(self.disk_file._datadir) with open(path, 'w') as f: write_metadata(f, {'name': '/a/c/o'}) auditor_worker = auditor.AuditorWorker(self.conf, self.logger) def blowup(*args): raise NameError('tpyo') with mock.patch.object(DiskFileManager, 'get_diskfile_from_audit_location', blowup): self.assertRaises(NameError, auditor_worker.object_audit, AuditLocation(os.path.dirname(path), 'sda', '0')) def test_failsafe_object_audit_will_swallow_errors_in_tests(self): timestamp = str(normalize_timestamp(time.time())) path = os.path.join(self.disk_file._datadir, timestamp + '.data') mkdirs(self.disk_file._datadir) with open(path, 'w') as f: write_metadata(f, {'name': '/a/c/o'}) auditor_worker = auditor.AuditorWorker(self.conf, self.logger) def blowup(*args): raise NameError('tpyo') with mock.patch('swift.obj.diskfile.DiskFile', blowup): auditor_worker.failsafe_object_audit( AuditLocation(os.path.dirname(path), 'sda', '0')) self.assertEquals(auditor_worker.errors, 1) def test_generic_exception_handling(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger) timestamp = str(normalize_timestamp(time.time())) pre_errors = auditor_worker.errors data = '0' * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) with mock.patch('swift.obj.diskfile.DiskFile', lambda *_: 1 / 0): auditor_worker.audit_all_objects() self.assertEquals(auditor_worker.errors, pre_errors + 1) def test_object_run_once_pass(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger) auditor_worker.log_time = 0 timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines data = '0' * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) auditor_worker.audit_all_objects() self.assertEquals(auditor_worker.quarantines, pre_quarantines) self.assertEquals(auditor_worker.stats_buckets[1024], 1) self.assertEquals(auditor_worker.stats_buckets[10240], 0) def test_object_run_once_no_sda(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger) timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines data = '0' * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) os.write(writer._fd, 'extra_data') auditor_worker.audit_all_objects() self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_run_once_multi_devices(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger) timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines data = '0' * 10 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) auditor_worker.audit_all_objects() self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'ob') data = '1' * 10 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) os.write(writer._fd, 'extra_data') auditor_worker.audit_all_objects() self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_run_fast_track_non_zero(self): self.auditor = auditor.ObjectAuditor(self.conf) self.auditor.log_time = 0 data = '0' * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': str(normalize_timestamp(time.time())), 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) etag = md5() etag.update('1' + '0' * 1023) etag = etag.hexdigest() metadata['ETag'] = etag write_metadata(writer._fd, metadata) quarantine_path = os.path.join(self.devices, 'sda', 'quarantined', 'objects') self.auditor.run_once(zero_byte_fps=50) self.assertFalse(os.path.isdir(quarantine_path)) self.auditor.run_once() self.assertTrue(os.path.isdir(quarantine_path)) def setup_bad_zero_byte(self, with_ts=False): self.auditor = auditor.ObjectAuditor(self.conf) self.auditor.log_time = 0 ts_file_path = '' if with_ts: name_hash = hash_path('a', 'c', 'o') dir_path = os.path.join(self.devices, 'sda', storage_directory(DATADIR, '0', name_hash)) ts_file_path = os.path.join(dir_path, '99999.ts') if not os.path.exists(dir_path): mkdirs(dir_path) fp = open(ts_file_path, 'w') write_metadata(fp, {'X-Timestamp': '99999', 'name': '/a/c/o'}) fp.close() etag = md5() with self.disk_file.create() as writer: etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': str(normalize_timestamp(time.time())), 'Content-Length': 10, } writer.put(metadata) etag = md5() etag = etag.hexdigest() metadata['ETag'] = etag write_metadata(writer._fd, metadata) return ts_file_path def test_object_run_fast_track_all(self): self.setup_bad_zero_byte() self.auditor.run_once() quarantine_path = os.path.join(self.devices, 'sda', 'quarantined', 'objects') self.assertTrue(os.path.isdir(quarantine_path)) def test_object_run_fast_track_zero(self): self.setup_bad_zero_byte() self.auditor.run_once(zero_byte_fps=50) quarantine_path = os.path.join(self.devices, 'sda', 'quarantined', 'objects') self.assertTrue(os.path.isdir(quarantine_path)) def test_object_run_fast_track_zero_check_closed(self): rat = [False] class FakeFile(DiskFile): def _quarantine(self, data_file, msg): rat[0] = True DiskFile._quarantine(self, data_file, msg) self.setup_bad_zero_byte() was_df = auditor.diskfile.DiskFile try: auditor.diskfile.DiskFile = FakeFile self.auditor.run_once(zero_byte_fps=50) quarantine_path = os.path.join(self.devices, 'sda', 'quarantined', 'objects') self.assertTrue(os.path.isdir(quarantine_path)) self.assertTrue(rat[0]) finally: auditor.diskfile.DiskFile = was_df def test_with_tombstone(self): ts_file_path = self.setup_bad_zero_byte(with_ts=True) self.assertTrue(ts_file_path.endswith('ts')) self.auditor.run_once() self.assertTrue(os.path.exists(ts_file_path)) def test_sleeper(self): auditor.SLEEP_BETWEEN_AUDITS = 0.10 my_auditor = auditor.ObjectAuditor(self.conf) start = time.time() my_auditor._sleep() delta_t = time.time() - start self.assert_(delta_t > 0.08) self.assert_(delta_t < 0.12) def test_run_forever(self): class StopForever(Exception): pass class ObjectAuditorMock(object): check_args = () check_kwargs = {} fork_called = 0 fork_res = 0 def mock_run(self, *args, **kwargs): self.check_args = args self.check_kwargs = kwargs def mock_sleep(self): raise StopForever('stop') def mock_fork(self): self.fork_called += 1 return self.fork_res my_auditor = auditor.ObjectAuditor( dict(devices=self.devices, mount_check='false', zero_byte_files_per_second=89)) mocker = ObjectAuditorMock() my_auditor.run_once = mocker.mock_run my_auditor._sleep = mocker.mock_sleep was_fork = os.fork try: os.fork = mocker.mock_fork self.assertRaises(StopForever, my_auditor.run_forever, zero_byte_fps=50) self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 50) self.assertEquals(mocker.fork_called, 0) self.assertRaises(StopForever, my_auditor.run_forever) self.assertEquals(mocker.fork_called, 1) self.assertEquals(mocker.check_args, ()) mocker.fork_res = 1 self.assertRaises(StopForever, my_auditor.run_forever) self.assertEquals(mocker.fork_called, 2) self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 89) finally: os.fork = was_fork
class ObjectController(object): """Implements the WSGI application for the Swift Object Server.""" def __init__(self, conf, logger=None): """ Creates a new WSGI application for the Swift Object Server. An example configuration is given at <source-dir>/etc/object-server.conf-sample or /etc/swift/object-server.conf-sample. """ self.logger = logger or get_logger(conf, log_route="object-server") self.node_timeout = int(conf.get("node_timeout", 3)) self.conn_timeout = float(conf.get("conn_timeout", 0.5)) self.client_timeout = int(conf.get("client_timeout", 60)) self.disk_chunk_size = int(conf.get("disk_chunk_size", 65536)) self.network_chunk_size = int(conf.get("network_chunk_size", 65536)) self.log_requests = config_true_value(conf.get("log_requests", "true")) self.max_upload_time = int(conf.get("max_upload_time", 86400)) self.slow = int(conf.get("slow", 0)) self.keep_cache_private = config_true_value(conf.get("keep_cache_private", "false")) replication_server = conf.get("replication_server", None) if replication_server is not None: replication_server = config_true_value(replication_server) self.replication_server = replication_server default_allowed_headers = """ content-disposition, content-encoding, x-delete-at, x-object-manifest, x-static-large-object, """ extra_allowed_headers = [ header.strip().lower() for header in conf.get("allowed_headers", default_allowed_headers).split(",") if header.strip() ] self.allowed_headers = set() for header in extra_allowed_headers: if header not in DATAFILE_SYSTEM_META: self.allowed_headers.add(header) self.expiring_objects_account = (conf.get("auto_create_account_prefix") or ".") + "expiring_objects" self.expiring_objects_container_divisor = int(conf.get("expiring_objects_container_divisor") or 86400) # Initialization was successful, so now apply the network chunk size # parameter as the default read / write buffer size for the network # sockets. # # NOTE WELL: This is a class setting, so until we get set this on a # per-connection basis, this affects reading and writing on ALL # sockets, those between the proxy servers and external clients, and # those between the proxy servers and the other internal servers. # # ** Because the primary motivation for this is to optimize how data # is written back to the proxy server, we could use the value from the # disk_chunk_size parameter. However, it affects all created sockets # using this class so we have chosen to tie it to the # network_chunk_size parameter value instead. socket._fileobject.default_bufsize = self.network_chunk_size # Provide further setup sepecific to an object server implemenation. self.setup(conf) def setup(self, conf): """ Implementation specific setup. This method is called at the very end by the constructor to allow a specific implementation to modify existing attributes or add its own attributes. :param conf: WSGI configuration parameter """ # Common on-disk hierarchy shared across account, container and object # servers. self._diskfile_mgr = DiskFileManager(conf, self.logger) # This is populated by global_conf_callback way below as the semaphore # is shared by all workers. if "replication_semaphore" in conf: # The value was put in a list so it could get past paste self.replication_semaphore = conf["replication_semaphore"][0] else: self.replication_semaphore = None self.replication_failure_threshold = int(conf.get("replication_failure_threshold") or 100) self.replication_failure_ratio = float(conf.get("replication_failure_ratio") or 1.0) def get_diskfile(self, device, partition, account, container, obj, **kwargs): """ Utility method for instantiating a DiskFile object supporting a given REST API. An implementation of the object server that wants to use a different DiskFile class would simply over-ride this method to provide that behavior. """ return self._diskfile_mgr.get_diskfile(device, partition, account, container, obj, **kwargs) def async_update(self, op, account, container, obj, host, partition, contdevice, headers_out, objdevice): """ Sends or saves an async update. :param op: operation performed (ex: 'PUT', or 'DELETE') :param account: account name for the object :param container: container name for the object :param obj: object name :param host: host that the container is on :param partition: partition that the container is on :param contdevice: device name that the container is on :param headers_out: dictionary of headers to send in the container request :param objdevice: device name that the object is in """ headers_out["user-agent"] = "obj-server %s" % os.getpid() full_path = "/%s/%s/%s" % (account, container, obj) if all([host, partition, contdevice]): try: with ConnectionTimeout(self.conn_timeout): ip, port = host.rsplit(":", 1) conn = http_connect(ip, port, contdevice, partition, op, full_path, headers_out) with Timeout(self.node_timeout): response = conn.getresponse() response.read() if is_success(response.status): return else: self.logger.error( _( "ERROR Container update failed " "(saving for async update later): %(status)d " "response from %(ip)s:%(port)s/%(dev)s" ), {"status": response.status, "ip": ip, "port": port, "dev": contdevice}, ) except (Exception, Timeout): self.logger.exception( _("ERROR container update failed with " "%(ip)s:%(port)s/%(dev)s (saving for async update later)"), {"ip": ip, "port": port, "dev": contdevice}, ) data = {"op": op, "account": account, "container": container, "obj": obj, "headers": headers_out} timestamp = headers_out["x-timestamp"] self._diskfile_mgr.pickle_async_update(objdevice, account, container, obj, data, timestamp) def container_update(self, op, account, container, obj, request, headers_out, objdevice): """ Update the container when objects are updated. :param op: operation performed (ex: 'PUT', or 'DELETE') :param account: account name for the object :param container: container name for the object :param obj: object name :param request: the original request object driving the update :param headers_out: dictionary of headers to send in the container request(s) :param objdevice: device name that the object is in """ headers_in = request.headers conthosts = [h.strip() for h in headers_in.get("X-Container-Host", "").split(",")] contdevices = [d.strip() for d in headers_in.get("X-Container-Device", "").split(",")] contpartition = headers_in.get("X-Container-Partition", "") if len(conthosts) != len(contdevices): # This shouldn't happen unless there's a bug in the proxy, # but if there is, we want to know about it. self.logger.error( _( "ERROR Container update failed: different " "numbers of hosts and devices in request: " '"%s" vs "%s"' ) % (headers_in.get("X-Container-Host", ""), headers_in.get("X-Container-Device", "")) ) return if contpartition: updates = zip(conthosts, contdevices) else: updates = [] headers_out["x-trans-id"] = headers_in.get("x-trans-id", "-") headers_out["referer"] = request.as_referer() for conthost, contdevice in updates: self.async_update(op, account, container, obj, conthost, contpartition, contdevice, headers_out, objdevice) def delete_at_update(self, op, delete_at, account, container, obj, request, objdevice): """ Update the expiring objects container when objects are updated. :param op: operation performed (ex: 'PUT', or 'DELETE') :param delete_at: scheduled delete in UNIX seconds, int :param account: account name for the object :param container: container name for the object :param obj: object name :param request: the original request driving the update :param objdevice: device name that the object is in """ if config_true_value(request.headers.get("x-backend-replication", "f")): return delete_at = normalize_delete_at_timestamp(delete_at) updates = [(None, None)] partition = None hosts = contdevices = [None] headers_in = request.headers headers_out = HeaderKeyDict( { "x-timestamp": headers_in["x-timestamp"], "x-trans-id": headers_in.get("x-trans-id", "-"), "referer": request.as_referer(), } ) if op != "DELETE": delete_at_container = headers_in.get("X-Delete-At-Container", None) if not delete_at_container: self.logger.warning( "X-Delete-At-Container header must be specified for " "expiring objects background %s to work properly. Making " "best guess as to the container name for now." % op ) # TODO(gholt): In a future release, change the above warning to # a raised exception and remove the guess code below. delete_at_container = ( int(delete_at) / self.expiring_objects_container_divisor * self.expiring_objects_container_divisor ) partition = headers_in.get("X-Delete-At-Partition", None) hosts = headers_in.get("X-Delete-At-Host", "") contdevices = headers_in.get("X-Delete-At-Device", "") updates = [ upd for upd in zip((h.strip() for h in hosts.split(",")), (c.strip() for c in contdevices.split(","))) if all(upd) and partition ] if not updates: updates = [(None, None)] headers_out["x-size"] = "0" headers_out["x-content-type"] = "text/plain" headers_out["x-etag"] = "d41d8cd98f00b204e9800998ecf8427e" else: # DELETEs of old expiration data have no way of knowing what the # old X-Delete-At-Container was at the time of the initial setting # of the data, so a best guess is made here. # Worst case is a DELETE is issued now for something that doesn't # exist there and the original data is left where it is, where # it will be ignored when the expirer eventually tries to issue the # object DELETE later since the X-Delete-At value won't match up. delete_at_container = str( int(delete_at) / self.expiring_objects_container_divisor * self.expiring_objects_container_divisor ) delete_at_container = normalize_delete_at_timestamp(delete_at_container) for host, contdevice in updates: self.async_update( op, self.expiring_objects_account, delete_at_container, "%s-%s/%s/%s" % (delete_at, account, container, obj), host, partition, contdevice, headers_out, objdevice, ) @public @timing_stats() def POST(self, request): """Handle HTTP POST requests for the Swift Object Server.""" device, partition, account, container, obj = split_and_validate_path(request, 5, 5, True) if "x-timestamp" not in request.headers or not check_float(request.headers["x-timestamp"]): return HTTPBadRequest(body="Missing timestamp", request=request, content_type="text/plain") new_delete_at = int(request.headers.get("X-Delete-At") or 0) if new_delete_at and new_delete_at < time.time(): return HTTPBadRequest(body="X-Delete-At in past", request=request, content_type="text/plain") try: disk_file = self.get_diskfile(device, partition, account, container, obj) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=device, request=request) try: orig_metadata = disk_file.read_metadata() except (DiskFileNotExist, DiskFileQuarantined): return HTTPNotFound(request=request) orig_timestamp = orig_metadata.get("X-Timestamp", "0") if orig_timestamp >= request.headers["x-timestamp"]: return HTTPConflict(request=request) metadata = {"X-Timestamp": request.headers["x-timestamp"]} metadata.update(val for val in request.headers.iteritems() if is_user_meta("object", val[0])) for header_key in self.allowed_headers: if header_key in request.headers: header_caps = header_key.title() metadata[header_caps] = request.headers[header_key] orig_delete_at = int(orig_metadata.get("X-Delete-At") or 0) if orig_delete_at != new_delete_at: if new_delete_at: self.delete_at_update("PUT", new_delete_at, account, container, obj, request, device) if orig_delete_at: self.delete_at_update("DELETE", orig_delete_at, account, container, obj, request, device) disk_file.write_metadata(metadata) return HTTPAccepted(request=request) @public @timing_stats() def PUT(self, request): """Handle HTTP PUT requests for the Swift Object Server.""" device, partition, account, container, obj = split_and_validate_path(request, 5, 5, True) if "x-timestamp" not in request.headers or not check_float(request.headers["x-timestamp"]): return HTTPBadRequest(body="Missing timestamp", request=request, content_type="text/plain") error_response = check_object_creation(request, obj) if error_response: return error_response new_delete_at = int(request.headers.get("X-Delete-At") or 0) if new_delete_at and new_delete_at < time.time(): return HTTPBadRequest(body="X-Delete-At in past", request=request, content_type="text/plain") try: fsize = request.message_length() except ValueError as e: return HTTPBadRequest(body=str(e), request=request, content_type="text/plain") try: disk_file = self.get_diskfile(device, partition, account, container, obj) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=device, request=request) try: orig_metadata = disk_file.read_metadata() except (DiskFileNotExist, DiskFileQuarantined): orig_metadata = {} orig_timestamp = orig_metadata.get("X-Timestamp") if orig_timestamp and orig_timestamp >= request.headers["x-timestamp"]: return HTTPConflict(request=request) orig_delete_at = int(orig_metadata.get("X-Delete-At") or 0) upload_expiration = time.time() + self.max_upload_time etag = md5() elapsed_time = 0 try: with disk_file.create(size=fsize) as writer: upload_size = 0 reader = request.environ["wsgi.input"].read for chunk in iter(lambda: reader(self.network_chunk_size), ""): start_time = time.time() if start_time > upload_expiration: self.logger.increment("PUT.timeouts") return HTTPRequestTimeout(request=request) etag.update(chunk) upload_size = writer.write(chunk) elapsed_time += time.time() - start_time if upload_size: self.logger.transfer_rate("PUT." + device + ".timing", elapsed_time, upload_size) if fsize is not None and fsize != upload_size: return HTTPClientDisconnect(request=request) etag = etag.hexdigest() if "etag" in request.headers and request.headers["etag"].lower() != etag: return HTTPUnprocessableEntity(request=request) metadata = { "X-Timestamp": request.headers["x-timestamp"], "Content-Type": request.headers["content-type"], "ETag": etag, "Content-Length": str(upload_size), } metadata.update(val for val in request.headers.iteritems() if is_user_meta("object", val[0])) for header_key in request.headers.get("X-Backend-Replication-Headers") or self.allowed_headers: if header_key in request.headers: header_caps = header_key.title() metadata[header_caps] = request.headers[header_key] writer.put(metadata) except DiskFileNoSpace: return HTTPInsufficientStorage(drive=device, request=request) if orig_delete_at != new_delete_at: if new_delete_at: self.delete_at_update("PUT", new_delete_at, account, container, obj, request, device) if orig_delete_at: self.delete_at_update("DELETE", orig_delete_at, account, container, obj, request, device) self.container_update( "PUT", account, container, obj, request, HeaderKeyDict( { "x-size": metadata["Content-Length"], "x-content-type": metadata["Content-Type"], "x-timestamp": metadata["X-Timestamp"], "x-etag": metadata["ETag"], } ), device, ) return HTTPCreated(request=request, etag=etag) @public @timing_stats() def GET(self, request): """Handle HTTP GET requests for the Swift Object Server.""" device, partition, account, container, obj = split_and_validate_path(request, 5, 5, True) keep_cache = self.keep_cache_private or ( "X-Auth-Token" not in request.headers and "X-Storage-Token" not in request.headers ) try: disk_file = self.get_diskfile(device, partition, account, container, obj) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=device, request=request) try: with disk_file.open(): metadata = disk_file.get_metadata() obj_size = int(metadata["Content-Length"]) if request.headers.get("if-match") not in (None, "*") and metadata["ETag"] not in request.if_match: return HTTPPreconditionFailed(request=request) if request.headers.get("if-none-match") is not None: if metadata["ETag"] in request.if_none_match: resp = HTTPNotModified(request=request) resp.etag = metadata["ETag"] return resp file_x_ts = metadata["X-Timestamp"] file_x_ts_flt = float(file_x_ts) try: if_unmodified_since = request.if_unmodified_since except (OverflowError, ValueError): # catches timestamps before the epoch return HTTPPreconditionFailed(request=request) file_x_ts_utc = datetime.fromtimestamp(file_x_ts_flt, UTC) if if_unmodified_since and file_x_ts_utc > if_unmodified_since: return HTTPPreconditionFailed(request=request) try: if_modified_since = request.if_modified_since except (OverflowError, ValueError): # catches timestamps before the epoch return HTTPPreconditionFailed(request=request) if if_modified_since and file_x_ts_utc <= if_modified_since: return HTTPNotModified(request=request) keep_cache = self.keep_cache_private or ( "X-Auth-Token" not in request.headers and "X-Storage-Token" not in request.headers ) response = Response( app_iter=disk_file.reader(keep_cache=keep_cache), request=request, conditional_response=True ) response.headers["Content-Type"] = metadata.get("Content-Type", "application/octet-stream") for key, value in metadata.iteritems(): if is_user_meta("object", key) or key.lower() in self.allowed_headers: response.headers[key] = value response.etag = metadata["ETag"] response.last_modified = math.ceil(file_x_ts_flt) response.content_length = obj_size try: response.content_encoding = metadata["Content-Encoding"] except KeyError: pass response.headers["X-Timestamp"] = file_x_ts resp = request.get_response(response) except DiskFileNotExist: if request.headers.get("if-match") == "*": resp = HTTPPreconditionFailed(request=request) else: resp = HTTPNotFound(request=request) except DiskFileQuarantined: resp = HTTPNotFound(request=request) return resp @public @timing_stats(sample_rate=0.8) def HEAD(self, request): """Handle HTTP HEAD requests for the Swift Object Server.""" device, partition, account, container, obj = split_and_validate_path(request, 5, 5, True) try: disk_file = self.get_diskfile(device, partition, account, container, obj) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=device, request=request) try: metadata = disk_file.read_metadata() except (DiskFileNotExist, DiskFileQuarantined): return HTTPNotFound(request=request) response = Response(request=request, conditional_response=True) response.headers["Content-Type"] = metadata.get("Content-Type", "application/octet-stream") for key, value in metadata.iteritems(): if is_user_meta("object", key) or key.lower() in self.allowed_headers: response.headers[key] = value response.etag = metadata["ETag"] ts = metadata["X-Timestamp"] response.last_modified = math.ceil(float(ts)) # Needed for container sync feature response.headers["X-Timestamp"] = ts response.content_length = int(metadata["Content-Length"]) try: response.content_encoding = metadata["Content-Encoding"] except KeyError: pass return response @public @timing_stats() def DELETE(self, request): """Handle HTTP DELETE requests for the Swift Object Server.""" device, partition, account, container, obj = split_and_validate_path(request, 5, 5, True) if "x-timestamp" not in request.headers or not check_float(request.headers["x-timestamp"]): return HTTPBadRequest(body="Missing timestamp", request=request, content_type="text/plain") try: disk_file = self.get_diskfile(device, partition, account, container, obj) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=device, request=request) try: orig_metadata = disk_file.read_metadata() except DiskFileExpired as e: orig_timestamp = e.timestamp orig_metadata = e.metadata response_class = HTTPNotFound except DiskFileDeleted as e: orig_timestamp = e.timestamp orig_metadata = {} response_class = HTTPNotFound except (DiskFileNotExist, DiskFileQuarantined): orig_timestamp = 0 orig_metadata = {} response_class = HTTPNotFound else: orig_timestamp = orig_metadata.get("X-Timestamp", 0) if orig_timestamp < request.headers["x-timestamp"]: response_class = HTTPNoContent else: response_class = HTTPConflict orig_delete_at = int(orig_metadata.get("X-Delete-At") or 0) try: req_if_delete_at_val = request.headers["x-if-delete-at"] req_if_delete_at = int(req_if_delete_at_val) except KeyError: pass except ValueError: return HTTPBadRequest(request=request, body="Bad X-If-Delete-At header value") else: if orig_delete_at != req_if_delete_at: return HTTPPreconditionFailed(request=request, body="X-If-Delete-At and X-Delete-At do not match") if orig_delete_at: self.delete_at_update("DELETE", orig_delete_at, account, container, obj, request, device) req_timestamp = request.headers["X-Timestamp"] if orig_timestamp < req_timestamp: disk_file.delete(req_timestamp) self.container_update( "DELETE", account, container, obj, request, HeaderKeyDict({"x-timestamp": req_timestamp}), device ) return response_class(request=request) @public @replication @timing_stats(sample_rate=0.1) def REPLICATE(self, request): """ Handle REPLICATE requests for the Swift Object Server. This is used by the object replicator to get hashes for directories. """ device, partition, suffix = split_and_validate_path(request, 2, 3, True) try: hashes = self._diskfile_mgr.get_hashes(device, partition, suffix) except DiskFileDeviceUnavailable: resp = HTTPInsufficientStorage(drive=device, request=request) else: resp = Response(body=pickle.dumps(hashes)) return resp @public @replication @timing_stats(sample_rate=0.1) def REPLICATION(self, request): return Response(app_iter=ssync_receiver.Receiver(self, request)()) def __call__(self, env, start_response): """WSGI Application entry point for the Swift Object Server.""" start_time = time.time() req = Request(env) self.logger.txn_id = req.headers.get("x-trans-id", None) if not check_utf8(req.path_info): res = HTTPPreconditionFailed(body="Invalid UTF8 or contains NULL") else: try: # disallow methods which have not been marked 'public' try: method = getattr(self, req.method) getattr(method, "publicly_accessible") replication_method = getattr(method, "replication", False) if self.replication_server is not None and self.replication_server != replication_method: raise AttributeError("Not allowed method.") except AttributeError: res = HTTPMethodNotAllowed() else: res = method(req) except DiskFileCollision: res = HTTPForbidden(request=req) except HTTPException as error_response: res = error_response except (Exception, Timeout): self.logger.exception( _("ERROR __call__ error with %(method)s" " %(path)s "), {"method": req.method, "path": req.path} ) res = HTTPInternalServerError(body=traceback.format_exc()) trans_time = time.time() - start_time if self.log_requests: log_line = '%s - - [%s] "%s %s" %s %s "%s" "%s" "%s" %.4f' % ( req.remote_addr, time.strftime("%d/%b/%Y:%H:%M:%S +0000", time.gmtime()), req.method, req.path, res.status.split()[0], res.content_length or "-", req.referer or "-", req.headers.get("x-trans-id", "-"), req.user_agent or "-", trans_time, ) if req.method in ("REPLICATE", "REPLICATION") or "X-Backend-Replication" in req.headers: self.logger.debug(log_line) else: self.logger.info(log_line) if req.method in ("PUT", "DELETE"): slow = self.slow - trans_time if slow > 0: sleep(slow) return res(env, start_response)
class ObjectController(BaseStorageServer): """Implements the WSGI application for the Swift Object Server.""" server_type = 'object-server' def __init__(self, conf, logger=None): """ Creates a new WSGI application for the Swift Object Server. An example configuration is given at <source-dir>/etc/object-server.conf-sample or /etc/swift/object-server.conf-sample. """ super(ObjectController, self).__init__(conf) self.logger = logger or get_logger(conf, log_route='object-server') self.node_timeout = int(conf.get('node_timeout', 3)) self.conn_timeout = float(conf.get('conn_timeout', 0.5)) self.client_timeout = int(conf.get('client_timeout', 60)) self.disk_chunk_size = int(conf.get('disk_chunk_size', 65536)) self.network_chunk_size = int(conf.get('network_chunk_size', 65536)) self.log_requests = config_true_value(conf.get('log_requests', 'true')) self.max_upload_time = int(conf.get('max_upload_time', 86400)) self.slow = int(conf.get('slow', 0)) self.keep_cache_private = \ config_true_value(conf.get('keep_cache_private', 'false')) default_allowed_headers = ''' content-disposition, content-encoding, x-delete-at, x-object-manifest, x-static-large-object, ''' extra_allowed_headers = [ header.strip().lower() for header in conf.get( 'allowed_headers', default_allowed_headers).split(',') if header.strip() ] self.allowed_headers = set() for header in extra_allowed_headers: if header not in DATAFILE_SYSTEM_META: self.allowed_headers.add(header) self.auto_create_account_prefix = \ conf.get('auto_create_account_prefix') or '.' self.expiring_objects_account = self.auto_create_account_prefix + \ (conf.get('expiring_objects_account_name') or 'expiring_objects') self.expiring_objects_container_divisor = \ int(conf.get('expiring_objects_container_divisor') or 86400) # Initialization was successful, so now apply the network chunk size # parameter as the default read / write buffer size for the network # sockets. # # NOTE WELL: This is a class setting, so until we get set this on a # per-connection basis, this affects reading and writing on ALL # sockets, those between the proxy servers and external clients, and # those between the proxy servers and the other internal servers. # # ** Because the primary motivation for this is to optimize how data # is written back to the proxy server, we could use the value from the # disk_chunk_size parameter. However, it affects all created sockets # using this class so we have chosen to tie it to the # network_chunk_size parameter value instead. socket._fileobject.default_bufsize = self.network_chunk_size # Provide further setup specific to an object server implementation. self.setup(conf) def setup(self, conf): """ Implementation specific setup. This method is called at the very end by the constructor to allow a specific implementation to modify existing attributes or add its own attributes. :param conf: WSGI configuration parameter """ # Common on-disk hierarchy shared across account, container and object # servers. self._diskfile_mgr = DiskFileManager(conf, self.logger) # This is populated by global_conf_callback way below as the semaphore # is shared by all workers. if 'replication_semaphore' in conf: # The value was put in a list so it could get past paste self.replication_semaphore = conf['replication_semaphore'][0] else: self.replication_semaphore = None self.replication_failure_threshold = int( conf.get('replication_failure_threshold') or 100) self.replication_failure_ratio = float( conf.get('replication_failure_ratio') or 1.0) def get_diskfile(self, device, partition, account, container, obj, policy_idx, **kwargs): """ Utility method for instantiating a DiskFile object supporting a given REST API. An implementation of the object server that wants to use a different DiskFile class would simply over-ride this method to provide that behavior. """ return self._diskfile_mgr.get_diskfile( device, partition, account, container, obj, policy_idx, **kwargs) def async_update(self, op, account, container, obj, host, partition, contdevice, headers_out, objdevice, policy_index): """ Sends or saves an async update. :param op: operation performed (ex: 'PUT', or 'DELETE') :param account: account name for the object :param container: container name for the object :param obj: object name :param host: host that the container is on :param partition: partition that the container is on :param contdevice: device name that the container is on :param headers_out: dictionary of headers to send in the container request :param objdevice: device name that the object is in :param policy_index: the associated storage policy index """ headers_out['user-agent'] = 'object-server %s' % os.getpid() full_path = '/%s/%s/%s' % (account, container, obj) if all([host, partition, contdevice]): try: with ConnectionTimeout(self.conn_timeout): ip, port = host.rsplit(':', 1) conn = http_connect(ip, port, contdevice, partition, op, full_path, headers_out) with Timeout(self.node_timeout): response = conn.getresponse() response.read() if is_success(response.status): return else: self.logger.error(_( 'ERROR Container update failed ' '(saving for async update later): %(status)d ' 'response from %(ip)s:%(port)s/%(dev)s'), {'status': response.status, 'ip': ip, 'port': port, 'dev': contdevice}) except (Exception, Timeout): self.logger.exception(_( 'ERROR container update failed with ' '%(ip)s:%(port)s/%(dev)s (saving for async update later)'), {'ip': ip, 'port': port, 'dev': contdevice}) data = {'op': op, 'account': account, 'container': container, 'obj': obj, 'headers': headers_out} timestamp = headers_out['x-timestamp'] self._diskfile_mgr.pickle_async_update(objdevice, account, container, obj, data, timestamp, policy_index) def container_update(self, op, account, container, obj, request, headers_out, objdevice, policy_idx): """ Update the container when objects are updated. :param op: operation performed (ex: 'PUT', or 'DELETE') :param account: account name for the object :param container: container name for the object :param obj: object name :param request: the original request object driving the update :param headers_out: dictionary of headers to send in the container request(s) :param objdevice: device name that the object is in """ headers_in = request.headers conthosts = [h.strip() for h in headers_in.get('X-Container-Host', '').split(',')] contdevices = [d.strip() for d in headers_in.get('X-Container-Device', '').split(',')] contpartition = headers_in.get('X-Container-Partition', '') if len(conthosts) != len(contdevices): # This shouldn't happen unless there's a bug in the proxy, # but if there is, we want to know about it. self.logger.error(_('ERROR Container update failed: different ' 'numbers of hosts and devices in request: ' '"%s" vs "%s"') % (headers_in.get('X-Container-Host', ''), headers_in.get('X-Container-Device', ''))) return if contpartition: updates = zip(conthosts, contdevices) else: updates = [] headers_out['x-trans-id'] = headers_in.get('x-trans-id', '-') headers_out['referer'] = request.as_referer() headers_out['X-Backend-Storage-Policy-Index'] = policy_idx for conthost, contdevice in updates: self.async_update(op, account, container, obj, conthost, contpartition, contdevice, headers_out, objdevice, policy_idx) def delete_at_update(self, op, delete_at, account, container, obj, request, objdevice, policy_index): """ Update the expiring objects container when objects are updated. :param op: operation performed (ex: 'PUT', or 'DELETE') :param delete_at: scheduled delete in UNIX seconds, int :param account: account name for the object :param container: container name for the object :param obj: object name :param request: the original request driving the update :param objdevice: device name that the object is in :param policy_index: the policy index to be used for tmp dir """ if config_true_value( request.headers.get('x-backend-replication', 'f')): return delete_at = normalize_delete_at_timestamp(delete_at) updates = [(None, None)] partition = None hosts = contdevices = [None] headers_in = request.headers headers_out = HeaderKeyDict({ # system accounts are always Policy-0 'X-Backend-Storage-Policy-Index': 0, 'x-timestamp': request.timestamp.internal, 'x-trans-id': headers_in.get('x-trans-id', '-'), 'referer': request.as_referer()}) if op != 'DELETE': delete_at_container = headers_in.get('X-Delete-At-Container', None) if not delete_at_container: self.logger.warning( 'X-Delete-At-Container header must be specified for ' 'expiring objects background %s to work properly. Making ' 'best guess as to the container name for now.' % op) # TODO(gholt): In a future release, change the above warning to # a raised exception and remove the guess code below. delete_at_container = get_expirer_container( delete_at, self.expiring_objects_container_divisor, account, container, obj) partition = headers_in.get('X-Delete-At-Partition', None) hosts = headers_in.get('X-Delete-At-Host', '') contdevices = headers_in.get('X-Delete-At-Device', '') updates = [upd for upd in zip((h.strip() for h in hosts.split(',')), (c.strip() for c in contdevices.split(','))) if all(upd) and partition] if not updates: updates = [(None, None)] headers_out['x-size'] = '0' headers_out['x-content-type'] = 'text/plain' headers_out['x-etag'] = 'd41d8cd98f00b204e9800998ecf8427e' else: # DELETEs of old expiration data have no way of knowing what the # old X-Delete-At-Container was at the time of the initial setting # of the data, so a best guess is made here. # Worst case is a DELETE is issued now for something that doesn't # exist there and the original data is left where it is, where # it will be ignored when the expirer eventually tries to issue the # object DELETE later since the X-Delete-At value won't match up. delete_at_container = get_expirer_container( delete_at, self.expiring_objects_container_divisor, account, container, obj) delete_at_container = normalize_delete_at_timestamp( delete_at_container) for host, contdevice in updates: self.async_update( op, self.expiring_objects_account, delete_at_container, '%s-%s/%s/%s' % (delete_at, account, container, obj), host, partition, contdevice, headers_out, objdevice, policy_index) @public @timing_stats() def POST(self, request): """Handle HTTP POST requests for the Swift Object Server.""" device, partition, account, container, obj, policy_idx = \ get_name_and_placement(request, 5, 5, True) req_timestamp = valid_timestamp(request) new_delete_at = int(request.headers.get('X-Delete-At') or 0) if new_delete_at and new_delete_at < time.time(): return HTTPBadRequest(body='X-Delete-At in past', request=request, content_type='text/plain') try: disk_file = self.get_diskfile( device, partition, account, container, obj, policy_idx=policy_idx) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=device, request=request) try: orig_metadata = disk_file.read_metadata() except DiskFileXattrNotSupported: return HTTPInsufficientStorage(drive=device, request=request) except (DiskFileNotExist, DiskFileQuarantined): return HTTPNotFound(request=request) orig_timestamp = Timestamp(orig_metadata.get('X-Timestamp', 0)) if orig_timestamp >= req_timestamp: return HTTPConflict( request=request, headers={'X-Backend-Timestamp': orig_timestamp.internal}) metadata = {'X-Timestamp': req_timestamp.internal} metadata.update(val for val in request.headers.iteritems() if is_user_meta('object', val[0])) for header_key in self.allowed_headers: if header_key in request.headers: header_caps = header_key.title() metadata[header_caps] = request.headers[header_key] orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0) if orig_delete_at != new_delete_at: if new_delete_at: self.delete_at_update('PUT', new_delete_at, account, container, obj, request, device, policy_idx) if orig_delete_at: self.delete_at_update('DELETE', orig_delete_at, account, container, obj, request, device, policy_idx) try: disk_file.write_metadata(metadata) except (DiskFileXattrNotSupported, DiskFileNoSpace): return HTTPInsufficientStorage(drive=device, request=request) return HTTPAccepted(request=request) @public @timing_stats() def PUT(self, request): """Handle HTTP PUT requests for the Swift Object Server.""" a=time.time() #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file: # tran_file.write("A = "+str(request.path)+"="+str(datetime.now())+"***") #with open("/home/ubuntu/spawn.txt", "a") as tran_file: # tran_file.write("At PUT Datetime ="+str(datetime.now())+" \n") device, partition, account, container, obj, policy_idx = \ get_name_and_placement(request, 5, 5, True) req_timestamp = valid_timestamp(request) error_response = check_object_creation(request, obj) #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file: # tran_file.write("B = "+str(request.path)+"="+str(datetime.now())+"***") if error_response: return error_response new_delete_at = int(request.headers.get('X-Delete-At') or 0) if new_delete_at and new_delete_at < time.time(): return HTTPBadRequest(body='X-Delete-At in past', request=request, content_type='text/plain') try: fsize = request.message_length() except ValueError as e: return HTTPBadRequest(body=str(e), request=request, content_type='text/plain') #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file: # tran_file.write("C = "+str(request.path)+"="+str(datetime.now())+"***") try: disk_file = self.get_diskfile( device, partition, account, container, obj, policy_idx=policy_idx) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=device, request=request) #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file: # tran_file.write("D = "+str(request.path)+"="+str(datetime.now())+"***") try: orig_metadata = disk_file.read_metadata() except DiskFileXattrNotSupported: return HTTPInsufficientStorage(drive=device, request=request) except (DiskFileNotExist, DiskFileQuarantined): orig_metadata = {} #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file: # tran_file.write("E = "+str(request.path)+"="+str(datetime.now())+"***") # Checks for If-None-Match if request.if_none_match is not None and orig_metadata: if '*' in request.if_none_match: # File exists already so return 412 return HTTPPreconditionFailed(request=request) if orig_metadata.get('ETag') in request.if_none_match: # The current ETag matches, so return 412 return HTTPPreconditionFailed(request=request) orig_timestamp = Timestamp(orig_metadata.get('X-Timestamp', 0)) if orig_timestamp >= req_timestamp: return HTTPConflict( request=request, headers={'X-Backend-Timestamp': orig_timestamp.internal}) orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0) upload_expiration = time.time() + self.max_upload_time etag = md5() elapsed_time = 0 #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file: # tran_file.write("F = "+str(request.path)+"="+str(datetime.now())+"***") try: with disk_file.create(size=fsize) as writer: upload_size = 0 def timeout_reader(): with ChunkReadTimeout(self.client_timeout): return request.environ['wsgi.input'].read( self.network_chunk_size) try: for chunk in iter(lambda: timeout_reader(), ''): start_time = time.time() if start_time > upload_expiration: self.logger.increment('PUT.timeouts') return HTTPRequestTimeout(request=request) etag.update(chunk) upload_size = writer.write(chunk) elapsed_time += time.time() - start_time except ChunkReadTimeout: return HTTPRequestTimeout(request=request) #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file: # tran_file.write("G = "+str(request.path)+"="+str(datetime.now())+"***") if upload_size: self.logger.transfer_rate( 'PUT.' + device + '.timing', elapsed_time, upload_size) if fsize is not None and fsize != upload_size: return HTTPClientDisconnect(request=request) etag = etag.hexdigest() if 'etag' in request.headers and \ request.headers['etag'].lower() != etag: return HTTPUnprocessableEntity(request=request) metadata = { 'X-Timestamp': request.timestamp.internal, 'Content-Type': request.headers['content-type'], 'ETag': etag, 'Content-Length': str(upload_size), } metadata.update(val for val in request.headers.iteritems() if is_sys_or_user_meta('object', val[0])) headers_to_copy = ( request.headers.get( 'X-Backend-Replication-Headers', '').split() + list(self.allowed_headers)) for header_key in headers_to_copy: if header_key in request.headers: header_caps = header_key.title() metadata[header_caps] = request.headers[header_key] #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file: # tran_file.write("H = "+str(request.path)+"="+str(datetime.now())+"***") writer.put(metadata) #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file: # tran_file.write("I = "+str(request.path)+"="+str(datetime.now())+"***") except (DiskFileXattrNotSupported, DiskFileNoSpace): return HTTPInsufficientStorage(drive=device, request=request) #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file: # tran_file.write("J = "+str(request.path)+"="+str(datetime.now())+"***") if orig_delete_at != new_delete_at: if new_delete_at: self.delete_at_update( 'PUT', new_delete_at, account, container, obj, request, device, policy_idx) if orig_delete_at: self.delete_at_update( 'DELETE', orig_delete_at, account, container, obj, request, device, policy_idx) #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file: # tran_file.write("K = "+str(request.path)+"="+str(datetime.now())+"***") self.container_update( 'PUT', account, container, obj, request, HeaderKeyDict({ 'x-size': metadata['Content-Length'], 'x-content-type': metadata['Content-Type'], 'x-timestamp': metadata['X-Timestamp'], 'x-etag': metadata['ETag']}), device, policy_idx) #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file: # tran_file.write("L = "+str(request.path)+"="+str(datetime.now())+"***") #with open("/home/ubuntu/obj_create.txt", "a") as tran_file: # tran_file.write("Started = "+str(datetime.now())+"\n") #a=2 #num=22227727 #for a in range(a, num): # if a % num == 0: # print('not prime') # break #else: # loop not exited via break # print('prime') #time.sleep(0.1) #with open("/home/ubuntu/obj_create.txt", "a") as tran_file: # tran_file.write("Stopped = "+str(datetime.now())+"\n") b=time.time()-a #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file: # tran_file.write("Total PUT duration of obj = "+str(request.path)+"="+str(b)+"="+str(datetime.now())+"\n") return HTTPCreated(request=request, etag=etag) @public @timing_stats() def GET(self, request): """Handle HTTP GET requests for the Swift Object Server.""" #with open("/home/ubuntu/spawn-read.txt", "a") as tran_file: # tran_file.write("At Read Start Datetime = "+str(datetime.now())+"\n") a=time.time() device, partition, account, container, obj, policy_idx = \ get_name_and_placement(request, 5, 5, True) keep_cache = self.keep_cache_private or ( 'X-Auth-Token' not in request.headers and 'X-Storage-Token' not in request.headers) try: disk_file = self.get_diskfile( device, partition, account, container, obj, policy_idx=policy_idx) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=device, request=request) try: with disk_file.open(): metadata = disk_file.get_metadata() obj_size = int(metadata['Content-Length']) file_x_ts = Timestamp(metadata['X-Timestamp']) keep_cache = (self.keep_cache_private or ('X-Auth-Token' not in request.headers and 'X-Storage-Token' not in request.headers)) response = Response( app_iter=disk_file.reader(keep_cache=keep_cache), request=request, conditional_response=True) response.headers['Content-Type'] = metadata.get( 'Content-Type', 'application/octet-stream') for key, value in metadata.iteritems(): if is_sys_or_user_meta('object', key) or \ key.lower() in self.allowed_headers: response.headers[key] = value response.etag = metadata['ETag'] response.last_modified = math.ceil(float(file_x_ts)) response.content_length = obj_size try: response.content_encoding = metadata[ 'Content-Encoding'] except KeyError: pass response.headers['X-Timestamp'] = file_x_ts.normal response.headers['X-Backend-Timestamp'] = file_x_ts.internal resp = request.get_response(response) except DiskFileXattrNotSupported: return HTTPInsufficientStorage(drive=device, request=request) except (DiskFileNotExist, DiskFileQuarantined) as e: headers = {} if hasattr(e, 'timestamp'): headers['X-Backend-Timestamp'] = e.timestamp.internal resp = HTTPNotFound(request=request, headers=headers, conditional_response=True) #with open("/home/ubuntu/spawn-read.txt", "a") as tran_file: # tran_file.write("At Read Delay start Datetime = "+str(datetime.now())+"\n") #a=2 #num=22227727 #for a in range(a, num): # if a % num == 0: # print('not prime') # break # else: # loop not exited via break # print('prime') #time.sleep(2) #with open("/home/ubuntu/spawn-read.txt", "a") as tran_file: # tran_file.write("At Read End Datetime = "+str(datetime.now())+"\n") b=time.time()-a #with open("/home/ubuntu/RSTORAGE.txt", "a") as tran_file: # tran_file.write("Total GET duration of obj = "+str(request.path)+"="+str(b)+"="+str(datetime.now())+"\n") return resp @public @timing_stats(sample_rate=0.8) def HEAD(self, request): """Handle HTTP HEAD requests for the Swift Object Server.""" device, partition, account, container, obj, policy_idx = \ get_name_and_placement(request, 5, 5, True) try: disk_file = self.get_diskfile( device, partition, account, container, obj, policy_idx=policy_idx) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=device, request=request) try: metadata = disk_file.read_metadata() except DiskFileXattrNotSupported: return HTTPInsufficientStorage(drive=device, request=request) except (DiskFileNotExist, DiskFileQuarantined) as e: headers = {} if hasattr(e, 'timestamp'): headers['X-Backend-Timestamp'] = e.timestamp.internal return HTTPNotFound(request=request, headers=headers, conditional_response=True) response = Response(request=request, conditional_response=True) response.headers['Content-Type'] = metadata.get( 'Content-Type', 'application/octet-stream') for key, value in metadata.iteritems(): if is_sys_or_user_meta('object', key) or \ key.lower() in self.allowed_headers: response.headers[key] = value response.etag = metadata['ETag'] ts = Timestamp(metadata['X-Timestamp']) response.last_modified = math.ceil(float(ts)) # Needed for container sync feature response.headers['X-Timestamp'] = ts.normal response.headers['X-Backend-Timestamp'] = ts.internal response.content_length = int(metadata['Content-Length']) try: response.content_encoding = metadata['Content-Encoding'] except KeyError: pass return response @public @timing_stats() def DELETE(self, request): """Handle HTTP DELETE requests for the Swift Object Server.""" device, partition, account, container, obj, policy_idx = \ get_name_and_placement(request, 5, 5, True) req_timestamp = valid_timestamp(request) try: disk_file = self.get_diskfile( device, partition, account, container, obj, policy_idx=policy_idx) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=device, request=request) try: orig_metadata = disk_file.read_metadata() except DiskFileXattrNotSupported: return HTTPInsufficientStorage(drive=device, request=request) except DiskFileExpired as e: orig_timestamp = e.timestamp orig_metadata = e.metadata response_class = HTTPNotFound except DiskFileDeleted as e: orig_timestamp = e.timestamp orig_metadata = {} response_class = HTTPNotFound except (DiskFileNotExist, DiskFileQuarantined): orig_timestamp = 0 orig_metadata = {} response_class = HTTPNotFound else: orig_timestamp = Timestamp(orig_metadata.get('X-Timestamp', 0)) if orig_timestamp < req_timestamp: response_class = HTTPNoContent else: response_class = HTTPConflict response_timestamp = max(orig_timestamp, req_timestamp) orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0) try: req_if_delete_at_val = request.headers['x-if-delete-at'] req_if_delete_at = int(req_if_delete_at_val) except KeyError: pass except ValueError: return HTTPBadRequest( request=request, body='Bad X-If-Delete-At header value') else: # request includes x-if-delete-at; we must not place a tombstone # if we can not verify the x-if-delete-at time if not orig_timestamp: # no object found at all return HTTPNotFound() if orig_delete_at != req_if_delete_at: return HTTPPreconditionFailed( request=request, body='X-If-Delete-At and X-Delete-At do not match') else: # differentiate success from no object at all response_class = HTTPNoContent if orig_delete_at: self.delete_at_update('DELETE', orig_delete_at, account, container, obj, request, device, policy_idx) if orig_timestamp < req_timestamp: disk_file.delete(req_timestamp) self.container_update( 'DELETE', account, container, obj, request, HeaderKeyDict({'x-timestamp': req_timestamp.internal}), device, policy_idx) return response_class( request=request, headers={'X-Backend-Timestamp': response_timestamp.internal}) @public @replication @timing_stats(sample_rate=0.1) def REPLICATE(self, request): """ Handle REPLICATE requests for the Swift Object Server. This is used by the object replicator to get hashes for directories. """ device, partition, suffix, policy_idx = \ get_name_and_placement(request, 2, 3, True) try: hashes = self._diskfile_mgr.get_hashes(device, partition, suffix, policy_idx) except DiskFileDeviceUnavailable: resp = HTTPInsufficientStorage(drive=device, request=request) else: resp = Response(body=pickle.dumps(hashes)) return resp @public @replication @timing_stats(sample_rate=0.1) def REPLICATION(self, request): return Response(app_iter=ssync_receiver.Receiver(self, request)()) def __call__(self, env, start_response): """WSGI Application entry point for the Swift Object Server.""" with open("/home/ubuntu/spawn.txt", "a") as tran_file: tran_file.write("At Start Datetime = "+str(datetime.now())+"\n") start_time = time.time() req = Request(env) self.logger.txn_id = req.headers.get('x-trans-id', None) if not check_utf8(req.path_info): res = HTTPPreconditionFailed(body='Invalid UTF8 or contains NULL') else: try: # disallow methods which have not been marked 'public' try: if req.method not in self.allowed_methods: raise AttributeError('Not allowed method.') except AttributeError: res = HTTPMethodNotAllowed() else: method = getattr(self, req.method) res = method(req) except DiskFileCollision: res = HTTPForbidden(request=req) except HTTPException as error_response: res = error_response except (Exception, Timeout): self.logger.exception(_( 'ERROR __call__ error with %(method)s' ' %(path)s '), {'method': req.method, 'path': req.path}) res = HTTPInternalServerError(body=traceback.format_exc()) trans_time = time.time() - start_time if self.log_requests: log_line = get_log_line(req, res, trans_time, '') if req.method in ('REPLICATE', 'REPLICATION') or \ 'X-Backend-Replication' in req.headers: self.logger.debug(log_line) else: self.logger.info(log_line) if req.method in ('PUT', 'DELETE'): slow = self.slow - trans_time with open("/home/ubuntu/spawn.txt", "a") as tran_file: tran_file.write("Before slow check Datetime = "+str(datetime.now())+" Slow="+str(slow)+"\n") if slow > 0: sleep(slow) # To be able to zero-copy send the object, we need a few things. # First, we have to be responding successfully to a GET, or else we're # not sending the object. Second, we have to be able to extract the # socket file descriptor from the WSGI input object. Third, the # diskfile has to support zero-copy send. # # There's a good chance that this could work for 206 responses too, # but the common case is sending the whole object, so we'll start # there. if req.method == 'GET' and res.status_int == 200 and \ isinstance(env['wsgi.input'], wsgi.Input): app_iter = getattr(res, 'app_iter', None) checker = getattr(app_iter, 'can_zero_copy_send', None) if checker and checker(): # For any kind of zero-copy thing like sendfile or splice, we # need the file descriptor. Eventlet doesn't provide a clean # way of getting that, so we resort to this. wsock = env['wsgi.input'].get_socket() wsockfd = wsock.fileno() # Don't call zero_copy_send() until after we force the HTTP # headers out of Eventlet and into the socket. def zero_copy_iter(): # If possible, set TCP_CORK so that headers don't # immediately go on the wire, but instead, wait for some # response body to make the TCP frames as large as # possible (and hence as few packets as possible). # # On non-Linux systems, we might consider TCP_NODELAY, but # since the only known zero-copy-capable diskfile uses # Linux-specific syscalls, we'll defer that work until # someone needs it. if hasattr(socket, 'TCP_CORK'): wsock.setsockopt(socket.IPPROTO_TCP, socket.TCP_CORK, 1) yield EventletPlungerString() try: app_iter.zero_copy_send(wsockfd) except Exception: self.logger.exception("zero_copy_send() blew up") raise yield '' # Get headers ready to go out res(env, start_response) return zero_copy_iter() else: return res(env, start_response) else: return res(env, start_response)
class TruncateMiddleware(object): def __init__(self, app, conf, *args, **kwargs): self.app = app self.conf = conf self.logger = get_logger(self.conf, log_route='truncate') self._diskfile_mgr = DiskFileManager(conf, self.logger) def truncate(self, env): req = Request(env) try: disk_file = self.get_diskfile(self.device, self.partition, self.account, self.container, self.obj) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=self.device, request=Request(copy(env))) # object flow 상, 임시 데이터를 삭제 후 DiskFileWrite 의 put을 하게 되면, # _finalize_put을 호출하게 된다. 이 때, metadata에 설정된 X-Timestamp 값으로 # object 파일명을 생성하고, 임시 파일로 대체한다. # 따라서 별 다른 truncate를 할 필요가 없다. ori_meta = disk_file.read_metadata() metadata = { 'X-Timestamp': ori_meta['X-Timestamp'], 'Content-Type': ori_meta['Content-Type'], 'ETag': 'd41d8cd98f00b204e9800998ecf8427e', 'Content-Length': 0, 'X-Object-Meta-Glacier': True, 'X-Object-Meta-S3-Content-Length': ori_meta['Content-Length'], 'X-Object-Meta-S3-ETag': ori_meta['ETag'] } # 원본 Object Metatdata 도 저장 metadata.update(val for val in ori_meta.iteritems() if is_user_meta('object', val[0])) # Object Restore 정보가 있으면 해당 정보 지움. # 이 경우는 restored object expiration 임. if 'X-Object-Meta-S3-Restore' in metadata: del metadata['X-Object-Meta-S3-Restore'] with disk_file.create(size=0) as writer: writer.put(metadata) return HTTPCreated(request=req, etag=ori_meta['ETag']) def get_diskfile(self, device, partition, account, container, obj, **kwargs): return self._diskfile_mgr.get_diskfile(device, partition, account, container, obj, **kwargs) def __call__(self, env, start_response): req = Request(copy(env)) method = req.method if (method == 'PUT' or method == 'POST') and \ GLACIER_FLAG_META in req.headers: self.device, self.partition, self.account, self.container, \ self.obj = split_and_validate_path(req, 5, 5, True) return self.truncate(env)(env, start_response) return self.app(env, start_response)
class TestAuditor(unittest.TestCase): def setUp(self): self.testdir = os.path.join(mkdtemp(), "tmp_test_object_auditor") self.devices = os.path.join(self.testdir, "node") self.rcache = os.path.join(self.testdir, "object.recon") self.logger = FakeLogger() rmtree(self.testdir, ignore_errors=1) mkdirs(os.path.join(self.devices, "sda")) os.mkdir(os.path.join(self.devices, "sdb")) # policy 0 self.objects = os.path.join(self.devices, "sda", get_data_dir(POLICIES[0])) self.objects_2 = os.path.join(self.devices, "sdb", get_data_dir(POLICIES[0])) os.mkdir(self.objects) # policy 1 self.objects_p1 = os.path.join(self.devices, "sda", get_data_dir(POLICIES[1])) self.objects_2_p1 = os.path.join(self.devices, "sdb", get_data_dir(POLICIES[1])) os.mkdir(self.objects_p1) self.parts = self.parts_p1 = {} for part in ["0", "1", "2", "3"]: self.parts[part] = os.path.join(self.objects, part) self.parts_p1[part] = os.path.join(self.objects_p1, part) os.mkdir(os.path.join(self.objects, part)) os.mkdir(os.path.join(self.objects_p1, part)) self.conf = dict(devices=self.devices, mount_check="false", object_size_stats="10,100,1024,10240") self.df_mgr = DiskFileManager(self.conf, self.logger) # diskfiles for policy 0, 1 self.disk_file = self.df_mgr.get_diskfile("sda", "0", "a", "c", "o", policy=POLICIES[0]) self.disk_file_p1 = self.df_mgr.get_diskfile("sda", "0", "a", "c", "o", policy=POLICIES[1]) def tearDown(self): rmtree(os.path.dirname(self.testdir), ignore_errors=1) unit.xattr_data = {} def test_worker_conf_parms(self): def check_common_defaults(): self.assertEquals(auditor_worker.max_bytes_per_second, 10000000) self.assertEquals(auditor_worker.log_time, 3600) # test default values conf = dict(devices=self.devices, mount_check="false", object_size_stats="10,100,1024,10240") auditor_worker = auditor.AuditorWorker(conf, self.logger, self.rcache, self.devices) check_common_defaults() self.assertEquals(auditor_worker.diskfile_mgr.disk_chunk_size, 65536) self.assertEquals(auditor_worker.max_files_per_second, 20) self.assertEquals(auditor_worker.zero_byte_only_at_fps, 0) # test specified audit value overrides conf.update({"disk_chunk_size": 4096}) auditor_worker = auditor.AuditorWorker(conf, self.logger, self.rcache, self.devices, zero_byte_only_at_fps=50) check_common_defaults() self.assertEquals(auditor_worker.diskfile_mgr.disk_chunk_size, 4096) self.assertEquals(auditor_worker.max_files_per_second, 50) self.assertEquals(auditor_worker.zero_byte_only_at_fps, 50) def test_object_audit_extra_data(self): def run_tests(disk_file): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) data = "0" * 1024 etag = md5() with disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() timestamp = str(normalize_timestamp(time.time())) metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)} writer.put(metadata) pre_quarantines = auditor_worker.quarantines auditor_worker.object_audit(AuditLocation(disk_file._datadir, "sda", "0", policy=POLICIES.legacy)) self.assertEquals(auditor_worker.quarantines, pre_quarantines) os.write(writer._fd, "extra_data") auditor_worker.object_audit(AuditLocation(disk_file._datadir, "sda", "0", policy=POLICIES.legacy)) self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) run_tests(self.disk_file) run_tests(self.disk_file_p1) def test_object_audit_diff_data(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) data = "0" * 1024 etag = md5() timestamp = str(normalize_timestamp(time.time())) with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)} writer.put(metadata) pre_quarantines = auditor_worker.quarantines # remake so it will have metadata self.disk_file = self.df_mgr.get_diskfile("sda", "0", "a", "c", "o", policy=POLICIES.legacy) auditor_worker.object_audit(AuditLocation(self.disk_file._datadir, "sda", "0", policy=POLICIES.legacy)) self.assertEquals(auditor_worker.quarantines, pre_quarantines) etag = md5() etag.update("1" + "0" * 1023) etag = etag.hexdigest() metadata["ETag"] = etag with self.disk_file.create() as writer: writer.write(data) writer.put(metadata) auditor_worker.object_audit(AuditLocation(self.disk_file._datadir, "sda", "0", policy=POLICIES.legacy)) self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_audit_no_meta(self): timestamp = str(normalize_timestamp(time.time())) path = os.path.join(self.disk_file._datadir, timestamp + ".data") mkdirs(self.disk_file._datadir) fp = open(path, "w") fp.write("0" * 1024) fp.close() invalidate_hash(os.path.dirname(self.disk_file._datadir)) auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) pre_quarantines = auditor_worker.quarantines auditor_worker.object_audit(AuditLocation(self.disk_file._datadir, "sda", "0", policy=POLICIES.legacy)) self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_audit_will_not_swallow_errors_in_tests(self): timestamp = str(normalize_timestamp(time.time())) path = os.path.join(self.disk_file._datadir, timestamp + ".data") mkdirs(self.disk_file._datadir) with open(path, "w") as f: write_metadata(f, {"name": "/a/c/o"}) auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) def blowup(*args): raise NameError("tpyo") with mock.patch.object(DiskFileManager, "get_diskfile_from_audit_location", blowup): self.assertRaises( NameError, auditor_worker.object_audit, AuditLocation(os.path.dirname(path), "sda", "0", policy=POLICIES.legacy), ) def test_failsafe_object_audit_will_swallow_errors_in_tests(self): timestamp = str(normalize_timestamp(time.time())) path = os.path.join(self.disk_file._datadir, timestamp + ".data") mkdirs(self.disk_file._datadir) with open(path, "w") as f: write_metadata(f, {"name": "/a/c/o"}) auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) def blowup(*args): raise NameError("tpyo") with mock.patch("swift.obj.diskfile.DiskFileManager.diskfile_cls", blowup): auditor_worker.failsafe_object_audit( AuditLocation(os.path.dirname(path), "sda", "0", policy=POLICIES.legacy) ) self.assertEquals(auditor_worker.errors, 1) def test_generic_exception_handling(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) # pretend that we logged (and reset counters) just now auditor_worker.last_logged = time.time() timestamp = str(normalize_timestamp(time.time())) pre_errors = auditor_worker.errors data = "0" * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)} writer.put(metadata) with mock.patch("swift.obj.diskfile.DiskFileManager.diskfile_cls", lambda *_: 1 / 0): auditor_worker.audit_all_objects() self.assertEquals(auditor_worker.errors, pre_errors + 1) def test_object_run_once_pass(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) auditor_worker.log_time = 0 timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines data = "0" * 1024 def write_file(df): etag = md5() with df.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)} writer.put(metadata) # policy 0 write_file(self.disk_file) # policy 1 write_file(self.disk_file_p1) auditor_worker.audit_all_objects() self.assertEquals(auditor_worker.quarantines, pre_quarantines) # 1 object per policy falls into 1024 bucket self.assertEquals(auditor_worker.stats_buckets[1024], 2) self.assertEquals(auditor_worker.stats_buckets[10240], 0) # pick up some additional code coverage, large file data = "0" * 1024 * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)} writer.put(metadata) auditor_worker.audit_all_objects(device_dirs=["sda", "sdb"]) self.assertEquals(auditor_worker.quarantines, pre_quarantines) # still have the 1024 byte object left in policy-1 (plus the # stats from the original 2) self.assertEquals(auditor_worker.stats_buckets[1024], 3) self.assertEquals(auditor_worker.stats_buckets[10240], 0) # and then policy-0 disk_file was re-written as a larger object self.assertEquals(auditor_worker.stats_buckets["OVER"], 1) # pick up even more additional code coverage, misc paths auditor_worker.log_time = -1 auditor_worker.stats_sizes = [] auditor_worker.audit_all_objects(device_dirs=["sda", "sdb"]) self.assertEquals(auditor_worker.quarantines, pre_quarantines) self.assertEquals(auditor_worker.stats_buckets[1024], 3) self.assertEquals(auditor_worker.stats_buckets[10240], 0) self.assertEquals(auditor_worker.stats_buckets["OVER"], 1) def test_object_run_logging(self): logger = FakeLogger() auditor_worker = auditor.AuditorWorker(self.conf, logger, self.rcache, self.devices) auditor_worker.audit_all_objects(device_dirs=["sda"]) log_lines = logger.get_lines_for_level("info") self.assertTrue(len(log_lines) > 0) self.assertTrue(log_lines[0].index("ALL - parallel, sda")) logger = FakeLogger() auditor_worker = auditor.AuditorWorker(self.conf, logger, self.rcache, self.devices, zero_byte_only_at_fps=50) auditor_worker.audit_all_objects(device_dirs=["sda"]) log_lines = logger.get_lines_for_level("info") self.assertTrue(len(log_lines) > 0) self.assertTrue(log_lines[0].index("ZBF - sda")) def test_object_run_once_no_sda(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines # pretend that we logged (and reset counters) just now auditor_worker.last_logged = time.time() data = "0" * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)} writer.put(metadata) os.write(writer._fd, "extra_data") auditor_worker.audit_all_objects() self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_run_once_multi_devices(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) # pretend that we logged (and reset counters) just now auditor_worker.last_logged = time.time() timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines data = "0" * 10 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)} writer.put(metadata) auditor_worker.audit_all_objects() self.disk_file = self.df_mgr.get_diskfile("sda", "0", "a", "c", "ob", policy=POLICIES.legacy) data = "1" * 10 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)} writer.put(metadata) os.write(writer._fd, "extra_data") auditor_worker.audit_all_objects() self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_run_fast_track_non_zero(self): self.auditor = auditor.ObjectAuditor(self.conf) self.auditor.log_time = 0 data = "0" * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { "ETag": etag, "X-Timestamp": str(normalize_timestamp(time.time())), "Content-Length": str(os.fstat(writer._fd).st_size), } writer.put(metadata) etag = md5() etag.update("1" + "0" * 1023) etag = etag.hexdigest() metadata["ETag"] = etag write_metadata(writer._fd, metadata) quarantine_path = os.path.join(self.devices, "sda", "quarantined", "objects") kwargs = {"mode": "once"} kwargs["zero_byte_fps"] = 50 self.auditor.run_audit(**kwargs) self.assertFalse(os.path.isdir(quarantine_path)) del (kwargs["zero_byte_fps"]) self.auditor.run_audit(**kwargs) self.assertTrue(os.path.isdir(quarantine_path)) def setup_bad_zero_byte(self, timestamp=None): if timestamp is None: timestamp = Timestamp(time.time()) self.auditor = auditor.ObjectAuditor(self.conf) self.auditor.log_time = 0 etag = md5() with self.disk_file.create() as writer: etag = etag.hexdigest() metadata = {"ETag": etag, "X-Timestamp": timestamp.internal, "Content-Length": 10} writer.put(metadata) etag = md5() etag = etag.hexdigest() metadata["ETag"] = etag write_metadata(writer._fd, metadata) def test_object_run_fast_track_all(self): self.setup_bad_zero_byte() kwargs = {"mode": "once"} self.auditor.run_audit(**kwargs) quarantine_path = os.path.join(self.devices, "sda", "quarantined", "objects") self.assertTrue(os.path.isdir(quarantine_path)) def test_object_run_fast_track_zero(self): self.setup_bad_zero_byte() kwargs = {"mode": "once"} kwargs["zero_byte_fps"] = 50 self.auditor.run_audit(**kwargs) quarantine_path = os.path.join(self.devices, "sda", "quarantined", "objects") self.assertTrue(os.path.isdir(quarantine_path)) def test_object_run_fast_track_zero_check_closed(self): rat = [False] class FakeFile(DiskFile): def _quarantine(self, data_file, msg): rat[0] = True DiskFile._quarantine(self, data_file, msg) self.setup_bad_zero_byte() with mock.patch("swift.obj.diskfile.DiskFileManager.diskfile_cls", FakeFile): kwargs = {"mode": "once"} kwargs["zero_byte_fps"] = 50 self.auditor.run_audit(**kwargs) quarantine_path = os.path.join(self.devices, "sda", "quarantined", "objects") self.assertTrue(os.path.isdir(quarantine_path)) self.assertTrue(rat[0]) @mock.patch.object(auditor.ObjectAuditor, "run_audit") @mock.patch("os.fork", return_value=0) def test_with_inaccessible_object_location(self, mock_os_fork, mock_run_audit): # Need to ensure that any failures in run_audit do # not prevent sys.exit() from running. Otherwise we get # zombie processes. e = OSError("permission denied") mock_run_audit.side_effect = e self.auditor = auditor.ObjectAuditor(self.conf) self.assertRaises(SystemExit, self.auditor.fork_child, self) def test_with_only_tombstone(self): # sanity check that auditor doesn't touch solitary tombstones ts_iter = make_timestamp_iter() self.setup_bad_zero_byte(timestamp=ts_iter.next()) self.disk_file.delete(ts_iter.next()) files = os.listdir(self.disk_file._datadir) self.assertEqual(1, len(files)) self.assertTrue(files[0].endswith("ts")) kwargs = {"mode": "once"} self.auditor.run_audit(**kwargs) files_after = os.listdir(self.disk_file._datadir) self.assertEqual(files, files_after) def test_with_tombstone_and_data(self): # rsync replication could leave a tombstone and data file in object # dir - verify they are both removed during audit ts_iter = make_timestamp_iter() ts_tomb = ts_iter.next() ts_data = ts_iter.next() self.setup_bad_zero_byte(timestamp=ts_data) tomb_file_path = os.path.join(self.disk_file._datadir, "%s.ts" % ts_tomb.internal) with open(tomb_file_path, "wb") as fd: write_metadata(fd, {"X-Timestamp": ts_tomb.internal}) files = os.listdir(self.disk_file._datadir) self.assertEqual(2, len(files)) self.assertTrue(os.path.basename(tomb_file_path) in files, files) kwargs = {"mode": "once"} self.auditor.run_audit(**kwargs) self.assertFalse(os.path.exists(self.disk_file._datadir)) def test_sleeper(self): with mock.patch("time.sleep", mock.MagicMock()) as mock_sleep: auditor.SLEEP_BETWEEN_AUDITS = 0.10 my_auditor = auditor.ObjectAuditor(self.conf) my_auditor._sleep() mock_sleep.assert_called_with(auditor.SLEEP_BETWEEN_AUDITS) def test_run_parallel_audit(self): class StopForever(Exception): pass class Bogus(Exception): pass class ObjectAuditorMock(object): check_args = () check_kwargs = {} check_device_dir = None fork_called = 0 master = 0 wait_called = 0 def mock_run(self, *args, **kwargs): self.check_args = args self.check_kwargs = kwargs if "zero_byte_fps" in kwargs: self.check_device_dir = kwargs.get("device_dirs") def mock_sleep_stop(self): raise StopForever("stop") def mock_sleep_continue(self): return def mock_audit_loop_error(self, parent, zbo_fps, override_devices=None, **kwargs): raise Bogus("exception") def mock_fork(self): self.fork_called += 1 if self.master: return self.fork_called else: return 0 def mock_wait(self): self.wait_called += 1 return (self.wait_called, 0) for i in string.ascii_letters[2:26]: mkdirs(os.path.join(self.devices, "sd%s" % i)) my_auditor = auditor.ObjectAuditor( dict(devices=self.devices, mount_check="false", zero_byte_files_per_second=89, concurrency=1) ) mocker = ObjectAuditorMock() my_auditor.logger.exception = mock.MagicMock() real_audit_loop = my_auditor.audit_loop my_auditor.audit_loop = mocker.mock_audit_loop_error my_auditor.run_audit = mocker.mock_run was_fork = os.fork was_wait = os.wait os.fork = mocker.mock_fork os.wait = mocker.mock_wait try: my_auditor._sleep = mocker.mock_sleep_stop my_auditor.run_once(zero_byte_fps=50) my_auditor.logger.exception.assert_called_once_with("ERROR auditing: exception") my_auditor.logger.exception.reset_mock() self.assertRaises(StopForever, my_auditor.run_forever) my_auditor.logger.exception.assert_called_once_with("ERROR auditing: exception") my_auditor.audit_loop = real_audit_loop self.assertRaises(StopForever, my_auditor.run_forever, zero_byte_fps=50) self.assertEquals(mocker.check_kwargs["zero_byte_fps"], 50) self.assertEquals(mocker.fork_called, 0) self.assertRaises(SystemExit, my_auditor.run_once) self.assertEquals(mocker.fork_called, 1) self.assertEquals(mocker.check_kwargs["zero_byte_fps"], 89) self.assertEquals(mocker.check_device_dir, []) self.assertEquals(mocker.check_args, ()) device_list = ["sd%s" % i for i in string.ascii_letters[2:10]] device_string = ",".join(device_list) device_string_bogus = device_string + ",bogus" mocker.fork_called = 0 self.assertRaises(SystemExit, my_auditor.run_once, devices=device_string_bogus) self.assertEquals(mocker.fork_called, 1) self.assertEquals(mocker.check_kwargs["zero_byte_fps"], 89) self.assertEquals(sorted(mocker.check_device_dir), device_list) mocker.master = 1 mocker.fork_called = 0 self.assertRaises(StopForever, my_auditor.run_forever) # Fork is called 2 times since the zbf process is forked just # once before self._sleep() is called and StopForever is raised # Also wait is called just once before StopForever is raised self.assertEquals(mocker.fork_called, 2) self.assertEquals(mocker.wait_called, 1) my_auditor._sleep = mocker.mock_sleep_continue my_auditor.concurrency = 2 mocker.fork_called = 0 mocker.wait_called = 0 my_auditor.run_once() # Fork is called no. of devices + (no. of devices)/2 + 1 times # since zbf process is forked (no.of devices)/2 + 1 times no_devices = len(os.listdir(self.devices)) self.assertEquals(mocker.fork_called, no_devices + no_devices / 2 + 1) self.assertEquals(mocker.wait_called, no_devices + no_devices / 2 + 1) finally: os.fork = was_fork os.wait = was_wait
class ObjectController(object): """Implements the WSGI application for the Swift Object Server.""" def __init__(self, conf, logger=None): """ Creates a new WSGI application for the Swift Object Server. An example configuration is given at <source-dir>/etc/object-server.conf-sample or /etc/swift/object-server.conf-sample. """ self.logger = logger or get_logger(conf, log_route='object-server') self.node_timeout = int(conf.get('node_timeout', 3)) self.conn_timeout = float(conf.get('conn_timeout', 0.5)) self.client_timeout = int(conf.get('client_timeout', 60)) self.disk_chunk_size = int(conf.get('disk_chunk_size', 65536)) self.network_chunk_size = int(conf.get('network_chunk_size', 65536)) self.log_requests = config_true_value(conf.get('log_requests', 'true')) self.max_upload_time = int(conf.get('max_upload_time', 86400)) self.slow = int(conf.get('slow', 0)) self.keep_cache_private = \ config_true_value(conf.get('keep_cache_private', 'false')) replication_server = conf.get('replication_server', None) if replication_server is not None: replication_server = config_true_value(replication_server) self.replication_server = replication_server default_allowed_headers = ''' content-disposition, content-encoding, x-delete-at, x-object-manifest, x-static-large-object, ''' extra_allowed_headers = [ header.strip().lower() for header in conf.get( 'allowed_headers', default_allowed_headers).split(',') if header.strip() ] self.allowed_headers = set() for header in extra_allowed_headers: if header not in DATAFILE_SYSTEM_META: self.allowed_headers.add(header) self.auto_create_account_prefix = \ conf.get('auto_create_account_prefix') or '.' self.expiring_objects_account = self.auto_create_account_prefix + \ (conf.get('expiring_objects_account_name') or 'expiring_objects') self.expiring_objects_container_divisor = \ int(conf.get('expiring_objects_container_divisor') or 86400) # Initialization was successful, so now apply the network chunk size # parameter as the default read / write buffer size for the network # sockets. # # NOTE WELL: This is a class setting, so until we get set this on a # per-connection basis, this affects reading and writing on ALL # sockets, those between the proxy servers and external clients, and # those between the proxy servers and the other internal servers. # # ** Because the primary motivation for this is to optimize how data # is written back to the proxy server, we could use the value from the # disk_chunk_size parameter. However, it affects all created sockets # using this class so we have chosen to tie it to the # network_chunk_size parameter value instead. socket._fileobject.default_bufsize = self.network_chunk_size # Provide further setup sepecific to an object server implemenation. self.setup(conf) def setup(self, conf): """ Implementation specific setup. This method is called at the very end by the constructor to allow a specific implementation to modify existing attributes or add its own attributes. :param conf: WSGI configuration parameter """ # Common on-disk hierarchy shared across account, container and object # servers. self._diskfile_mgr = DiskFileManager(conf, self.logger) # This is populated by global_conf_callback way below as the semaphore # is shared by all workers. if 'replication_semaphore' in conf: # The value was put in a list so it could get past paste self.replication_semaphore = conf['replication_semaphore'][0] else: self.replication_semaphore = None self.replication_failure_threshold = int( conf.get('replication_failure_threshold') or 100) self.replication_failure_ratio = float( conf.get('replication_failure_ratio') or 1.0) def get_diskfile(self, device, partition, account, container, obj, policy_idx, **kwargs): """ Utility method for instantiating a DiskFile object supporting a given REST API. An implementation of the object server that wants to use a different DiskFile class would simply over-ride this method to provide that behavior. """ return self._diskfile_mgr.get_diskfile( device, partition, account, container, obj, policy_idx, **kwargs) def async_update(self, op, account, container, obj, host, partition, contdevice, headers_out, objdevice, policy_idx): """ Sends or saves an async update. :param op: operation performed (ex: 'PUT', or 'DELETE') :param account: account name for the object :param container: container name for the object :param obj: object name :param host: host that the container is on :param partition: partition that the container is on :param contdevice: device name that the container is on :param headers_out: dictionary of headers to send in the container request :param objdevice: device name that the object is in :param policy_idx: the associated storage policy index """ headers_out['user-agent'] = 'obj-server %s' % os.getpid() full_path = '/%s/%s/%s' % (account, container, obj) if all([host, partition, contdevice]): try: with ConnectionTimeout(self.conn_timeout): ip, port = host.rsplit(':', 1) conn = http_connect(ip, port, contdevice, partition, op, full_path, headers_out) with Timeout(self.node_timeout): response = conn.getresponse() response.read() if is_success(response.status): return else: self.logger.error(_( 'ERROR Container update failed ' '(saving for async update later): %(status)d ' 'response from %(ip)s:%(port)s/%(dev)s'), {'status': response.status, 'ip': ip, 'port': port, 'dev': contdevice}) except (Exception, Timeout): self.logger.exception(_( 'ERROR container update failed with ' '%(ip)s:%(port)s/%(dev)s (saving for async update later)'), {'ip': ip, 'port': port, 'dev': contdevice}) data = {'op': op, 'account': account, 'container': container, 'obj': obj, 'headers': headers_out} timestamp = headers_out['x-timestamp'] self._diskfile_mgr.pickle_async_update(objdevice, account, container, obj, data, timestamp, policy_idx) def container_update(self, op, account, container, obj, request, headers_out, objdevice, policy_idx): """ Update the container when objects are updated. :param op: operation performed (ex: 'PUT', or 'DELETE') :param account: account name for the object :param container: container name for the object :param obj: object name :param request: the original request object driving the update :param headers_out: dictionary of headers to send in the container request(s) :param objdevice: device name that the object is in """ headers_in = request.headers conthosts = [h.strip() for h in headers_in.get('X-Container-Host', '').split(',')] contdevices = [d.strip() for d in headers_in.get('X-Container-Device', '').split(',')] contpartition = headers_in.get('X-Container-Partition', '') if len(conthosts) != len(contdevices): # This shouldn't happen unless there's a bug in the proxy, # but if there is, we want to know about it. self.logger.error(_('ERROR Container update failed: different ' 'numbers of hosts and devices in request: ' '"%s" vs "%s"') % (headers_in.get('X-Container-Host', ''), headers_in.get('X-Container-Device', ''))) return if contpartition: updates = zip(conthosts, contdevices) else: updates = [] headers_out['x-trans-id'] = headers_in.get('x-trans-id', '-') headers_out['referer'] = request.as_referer() headers_out[POLICY_INDEX] = str(policy_idx) for conthost, contdevice in updates: self.async_update(op, account, container, obj, conthost, contpartition, contdevice, headers_out, objdevice, policy_idx) def delete_at_update(self, op, delete_at, account, container, obj, request, objdevice): """ Update the expiring objects container when objects are updated. :param op: operation performed (ex: 'PUT', or 'DELETE') :param delete_at: scheduled delete in UNIX seconds, int :param account: account name for the object :param container: container name for the object :param obj: object name :param request: the original request driving the update :param objdevice: device name that the object is in """ if config_true_value( request.headers.get('x-backend-replication', 'f')): return delete_at = normalize_delete_at_timestamp(delete_at) updates = [(None, None)] partition = None hosts = contdevices = [None] headers_in = request.headers headers_out = HeaderKeyDict({ 'x-timestamp': headers_in['x-timestamp'], 'x-trans-id': headers_in.get('x-trans-id', '-'), 'referer': request.as_referer()}) if op != 'DELETE': delete_at_container = headers_in.get('X-Delete-At-Container', None) if not delete_at_container: self.logger.warning( 'X-Delete-At-Container header must be specified for ' 'expiring objects background %s to work properly. Making ' 'best guess as to the container name for now.' % op) # TODO(gholt): In a future release, change the above warning to # a raised exception and remove the guess code below. delete_at_container = ( int(delete_at) / self.expiring_objects_container_divisor * self.expiring_objects_container_divisor) partition = headers_in.get('X-Delete-At-Partition', None) hosts = headers_in.get('X-Delete-At-Host', '') contdevices = headers_in.get('X-Delete-At-Device', '') updates = [upd for upd in zip((h.strip() for h in hosts.split(',')), (c.strip() for c in contdevices.split(','))) if all(upd) and partition] if not updates: updates = [(None, None)] headers_out['x-size'] = '0' headers_out['x-content-type'] = 'text/plain' headers_out['x-etag'] = 'd41d8cd98f00b204e9800998ecf8427e' else: # DELETEs of old expiration data have no way of knowing what the # old X-Delete-At-Container was at the time of the initial setting # of the data, so a best guess is made here. # Worst case is a DELETE is issued now for something that doesn't # exist there and the original data is left where it is, where # it will be ignored when the expirer eventually tries to issue the # object DELETE later since the X-Delete-At value won't match up. delete_at_container = str( int(delete_at) / self.expiring_objects_container_divisor * self.expiring_objects_container_divisor) delete_at_container = normalize_delete_at_timestamp( delete_at_container) for host, contdevice in updates: self.async_update( op, self.expiring_objects_account, delete_at_container, '%s-%s/%s/%s' % (delete_at, account, container, obj), host, partition, contdevice, headers_out, objdevice, 0) @public @timing_stats() def POST(self, request): """Handle HTTP POST requests for the Swift Object Server.""" device, partition, account, container, obj, policy_idx = \ get_name_and_placement(request, 5, 5, True) if 'x-timestamp' not in request.headers or \ not check_float(request.headers['x-timestamp']): return HTTPBadRequest(body='Missing timestamp', request=request, content_type='text/plain') new_delete_at = int(request.headers.get('X-Delete-At') or 0) if new_delete_at and new_delete_at < time.time(): return HTTPBadRequest(body='X-Delete-At in past', request=request, content_type='text/plain') try: disk_file = self.get_diskfile( device, partition, account, container, obj, policy_idx=policy_idx) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=device, request=request) try: orig_metadata = disk_file.read_metadata() except (DiskFileNotExist, DiskFileQuarantined): return HTTPNotFound(request=request) orig_timestamp = orig_metadata.get('X-Timestamp', '0') if orig_timestamp >= request.headers['x-timestamp']: return HTTPConflict(request=request) metadata = {'X-Timestamp': request.headers['x-timestamp']} metadata.update(val for val in request.headers.iteritems() if is_user_meta('object', val[0])) for header_key in self.allowed_headers: if header_key in request.headers: header_caps = header_key.title() metadata[header_caps] = request.headers[header_key] orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0) if orig_delete_at != new_delete_at: if new_delete_at: self.delete_at_update('PUT', new_delete_at, account, container, obj, request, device) if orig_delete_at: self.delete_at_update('DELETE', orig_delete_at, account, container, obj, request, device) disk_file.write_metadata(metadata) return HTTPAccepted(request=request) @public @timing_stats() def PUT(self, request): """Handle HTTP PUT requests for the Swift Object Server.""" device, partition, account, container, obj, policy_idx = \ get_name_and_placement(request, 5, 5, True) if 'x-timestamp' not in request.headers or \ not check_float(request.headers['x-timestamp']): return HTTPBadRequest(body='Missing timestamp', request=request, content_type='text/plain') error_response = check_object_creation(request, obj) if error_response: return error_response new_delete_at = int(request.headers.get('X-Delete-At') or 0) if new_delete_at and new_delete_at < time.time(): return HTTPBadRequest(body='X-Delete-At in past', request=request, content_type='text/plain') try: fsize = request.message_length() except ValueError as e: return HTTPBadRequest(body=str(e), request=request, content_type='text/plain') try: disk_file = self.get_diskfile( device, partition, account, container, obj, policy_idx=policy_idx) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=device, request=request) try: orig_metadata = disk_file.read_metadata() except (DiskFileNotExist, DiskFileQuarantined): orig_metadata = {} # Checks for If-None-Match if request.if_none_match is not None and orig_metadata: if '*' in request.if_none_match: # File exists already so return 412 return HTTPPreconditionFailed(request=request) if orig_metadata.get('ETag') in request.if_none_match: # The current ETag matches, so return 412 return HTTPPreconditionFailed(request=request) orig_timestamp = orig_metadata.get('X-Timestamp') if orig_timestamp and orig_timestamp >= request.headers['x-timestamp']: return HTTPConflict(request=request) orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0) upload_expiration = time.time() + self.max_upload_time etag = md5() elapsed_time = 0 try: with disk_file.create(size=fsize) as writer: upload_size = 0 def timeout_reader(): with ChunkReadTimeout(self.client_timeout): return request.environ['wsgi.input'].read( self.network_chunk_size) try: for chunk in iter(lambda: timeout_reader(), ''): start_time = time.time() if start_time > upload_expiration: self.logger.increment('PUT.timeouts') return HTTPRequestTimeout(request=request) etag.update(chunk) upload_size = writer.write(chunk) elapsed_time += time.time() - start_time except ChunkReadTimeout: return HTTPRequestTimeout(request=request) if upload_size: self.logger.transfer_rate( 'PUT.' + device + '.timing', elapsed_time, upload_size) if fsize is not None and fsize != upload_size: return HTTPClientDisconnect(request=request) etag = etag.hexdigest() if 'etag' in request.headers and \ request.headers['etag'].lower() != etag: return HTTPUnprocessableEntity(request=request) metadata = { 'X-Timestamp': request.headers['x-timestamp'], 'Content-Type': request.headers['content-type'], 'ETag': etag, 'Content-Length': str(upload_size), } metadata.update(val for val in request.headers.iteritems() if is_user_meta('object', val[0])) for header_key in ( request.headers.get('X-Backend-Replication-Headers') or self.allowed_headers): if header_key in request.headers: header_caps = header_key.title() metadata[header_caps] = request.headers[header_key] writer.put(metadata) except DiskFileNoSpace: return HTTPInsufficientStorage(drive=device, request=request) if orig_delete_at != new_delete_at: if new_delete_at: self.delete_at_update( 'PUT', new_delete_at, account, container, obj, request, device) if orig_delete_at: self.delete_at_update( 'DELETE', orig_delete_at, account, container, obj, request, device) self.container_update( 'PUT', account, container, obj, request, HeaderKeyDict({ 'x-size': metadata['Content-Length'], 'x-content-type': metadata['Content-Type'], 'x-timestamp': metadata['X-Timestamp'], 'x-etag': metadata['ETag']}), device, policy_idx) return HTTPCreated(request=request, etag=etag) @public @timing_stats() def GET(self, request): """Handle HTTP GET requests for the Swift Object Server.""" device, partition, account, container, obj, policy_idx = \ get_name_and_placement(request, 5, 5, True) keep_cache = self.keep_cache_private or ( 'X-Auth-Token' not in request.headers and 'X-Storage-Token' not in request.headers) try: disk_file = self.get_diskfile( device, partition, account, container, obj, policy_idx=policy_idx) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=device, request=request) try: with disk_file.open(): metadata = disk_file.get_metadata() obj_size = int(metadata['Content-Length']) file_x_ts = metadata['X-Timestamp'] file_x_ts_flt = float(file_x_ts) keep_cache = (self.keep_cache_private or ('X-Auth-Token' not in request.headers and 'X-Storage-Token' not in request.headers)) response = Response( app_iter=disk_file.reader(keep_cache=keep_cache), request=request, conditional_response=True) response.headers['Content-Type'] = metadata.get( 'Content-Type', 'application/octet-stream') for key, value in metadata.iteritems(): if is_user_meta('object', key) or \ key.lower() in self.allowed_headers: response.headers[key] = value response.etag = metadata['ETag'] response.last_modified = math.ceil(file_x_ts_flt) response.content_length = obj_size try: response.content_encoding = metadata[ 'Content-Encoding'] except KeyError: pass response.headers['X-Timestamp'] = file_x_ts resp = request.get_response(response) except (DiskFileNotExist, DiskFileQuarantined) as e: headers = {} if hasattr(e, 'timestamp'): headers['X-Timestamp'] = e.timestamp resp = HTTPNotFound(request=request, headers=headers, conditional_response=True) return resp @public @timing_stats(sample_rate=0.8) def HEAD(self, request): """Handle HTTP HEAD requests for the Swift Object Server.""" device, partition, account, container, obj, policy_idx = \ get_name_and_placement(request, 5, 5, True) try: disk_file = self.get_diskfile( device, partition, account, container, obj, policy_idx=policy_idx) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=device, request=request) try: metadata = disk_file.read_metadata() except (DiskFileNotExist, DiskFileQuarantined) as e: headers = {} if hasattr(e, 'timestamp'): headers['X-Timestamp'] = e.timestamp return HTTPNotFound(request=request, headers=headers, conditional_response=True) response = Response(request=request, conditional_response=True) response.headers['Content-Type'] = metadata.get( 'Content-Type', 'application/octet-stream') for key, value in metadata.iteritems(): if is_user_meta('object', key) or \ key.lower() in self.allowed_headers: response.headers[key] = value response.etag = metadata['ETag'] ts = metadata['X-Timestamp'] response.last_modified = math.ceil(float(ts)) # Needed for container sync feature response.headers['X-Timestamp'] = ts response.content_length = int(metadata['Content-Length']) try: response.content_encoding = metadata['Content-Encoding'] except KeyError: pass return response @public @timing_stats() def DELETE(self, request): """Handle HTTP DELETE requests for the Swift Object Server.""" device, partition, account, container, obj, policy_idx = \ get_name_and_placement(request, 5, 5, True) if 'x-timestamp' not in request.headers or \ not check_float(request.headers['x-timestamp']): return HTTPBadRequest(body='Missing timestamp', request=request, content_type='text/plain') try: disk_file = self.get_diskfile( device, partition, account, container, obj, policy_idx=policy_idx) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=device, request=request) try: orig_metadata = disk_file.read_metadata() except DiskFileExpired as e: orig_timestamp = e.timestamp orig_metadata = e.metadata response_class = HTTPNotFound except DiskFileDeleted as e: orig_timestamp = e.timestamp orig_metadata = {} response_class = HTTPNotFound except (DiskFileNotExist, DiskFileQuarantined): orig_timestamp = 0 orig_metadata = {} response_class = HTTPNotFound else: orig_timestamp = orig_metadata.get('X-Timestamp', 0) if orig_timestamp < request.headers['x-timestamp']: response_class = HTTPNoContent else: response_class = HTTPConflict orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0) try: req_if_delete_at_val = request.headers['x-if-delete-at'] req_if_delete_at = int(req_if_delete_at_val) except KeyError: pass except ValueError: return HTTPBadRequest( request=request, body='Bad X-If-Delete-At header value') else: if orig_delete_at != req_if_delete_at: return HTTPPreconditionFailed( request=request, body='X-If-Delete-At and X-Delete-At do not match') if orig_delete_at: self.delete_at_update('DELETE', orig_delete_at, account, container, obj, request, device) req_timestamp = request.headers['X-Timestamp'] if orig_timestamp < req_timestamp: disk_file.delete(req_timestamp) self.container_update( 'DELETE', account, container, obj, request, HeaderKeyDict({'x-timestamp': req_timestamp}), device, policy_idx) return response_class(request=request) @public @replication @timing_stats(sample_rate=0.1) def REPLICATE(self, request): """ Handle REPLICATE requests for the Swift Object Server. This is used by the object replicator to get hashes for directories. """ device, partition, suffix, policy_idx = \ get_name_and_placement(request, 2, 3, True) try: hashes = self._diskfile_mgr.get_hashes(device, partition, suffix, policy_idx) except DiskFileDeviceUnavailable: resp = HTTPInsufficientStorage(drive=device, request=request) else: resp = Response(body=pickle.dumps(hashes)) return resp @public @replication @timing_stats(sample_rate=0.1) def REPLICATION(self, request): return Response(app_iter=ssync_receiver.Receiver(self, request)()) def __call__(self, env, start_response): """WSGI Application entry point for the Swift Object Server.""" start_time = time.time() req = Request(env) self.logger.txn_id = req.headers.get('x-trans-id', None) if not check_utf8(req.path_info): res = HTTPPreconditionFailed(body='Invalid UTF8 or contains NULL') else: try: # disallow methods which have not been marked 'public' try: method = getattr(self, req.method) getattr(method, 'publicly_accessible') replication_method = getattr(method, 'replication', False) if (self.replication_server is not None and self.replication_server != replication_method): raise AttributeError('Not allowed method.') except AttributeError: res = HTTPMethodNotAllowed() else: res = method(req) except DiskFileCollision: res = HTTPForbidden(request=req) except HTTPException as error_response: res = error_response except (Exception, Timeout): self.logger.exception(_( 'ERROR __call__ error with %(method)s' ' %(path)s '), {'method': req.method, 'path': req.path}) res = HTTPInternalServerError(body=traceback.format_exc()) trans_time = time.time() - start_time if self.log_requests: log_line = get_log_line(req, res, trans_time, '') if req.method in ('REPLICATE', 'REPLICATION') or \ 'X-Backend-Replication' in req.headers: self.logger.debug(log_line) else: self.logger.info(log_line) if req.method in ('PUT', 'DELETE'): slow = self.slow - trans_time if slow > 0: sleep(slow) return res(env, start_response)
class ObjectController(object): """Implements the WSGI application for the Swift Object Server.""" def __init__(self, conf, logger=None): """ Creates a new WSGI application for the Swift Object Server. An example configuration is given at <source-dir>/etc/object-server.conf-sample or /etc/swift/object-server.conf-sample. """ self.logger = logger or get_logger(conf, log_route='object-server') self.node_timeout = int(conf.get('node_timeout', 3)) self.conn_timeout = float(conf.get('conn_timeout', 0.5)) self.client_timeout = int(conf.get('client_timeout', 60)) self.disk_chunk_size = int(conf.get('disk_chunk_size', 65536)) self.network_chunk_size = int(conf.get('network_chunk_size', 65536)) self.log_requests = config_true_value(conf.get('log_requests', 'true')) self.max_upload_time = int(conf.get('max_upload_time', 86400)) self.slow = int(conf.get('slow', 0)) self.keep_cache_private = \ config_true_value(conf.get('keep_cache_private', 'false')) replication_server = conf.get('replication_server', None) if replication_server is not None: replication_server = config_true_value(replication_server) self.replication_server = replication_server default_allowed_headers = ''' content-disposition, content-encoding, x-delete-at, x-object-manifest, x-static-large-object, ''' extra_allowed_headers = [ header.strip().lower() for header in conf.get( 'allowed_headers', default_allowed_headers).split(',') if header.strip() ] self.allowed_headers = set() for header in extra_allowed_headers: if header not in DATAFILE_SYSTEM_META: self.allowed_headers.add(header) self.expiring_objects_account = \ (conf.get('auto_create_account_prefix') or '.') + \ (conf.get('expiring_objects_account_name') or 'expiring_objects') self.expiring_objects_container_divisor = \ int(conf.get('expiring_objects_container_divisor') or 86400) # Initialization was successful, so now apply the network chunk size # parameter as the default read / write buffer size for the network # sockets. # # NOTE WELL: This is a class setting, so until we get set this on a # per-connection basis, this affects reading and writing on ALL # sockets, those between the proxy servers and external clients, and # those between the proxy servers and the other internal servers. # # ** Because the primary motivation for this is to optimize how data # is written back to the proxy server, we could use the value from the # disk_chunk_size parameter. However, it affects all created sockets # using this class so we have chosen to tie it to the # network_chunk_size parameter value instead. socket._fileobject.default_bufsize = self.network_chunk_size # Provide further setup sepecific to an object server implemenation. self.setup(conf) def setup(self, conf): """ Implementation specific setup. This method is called at the very end by the constructor to allow a specific implementation to modify existing attributes or add its own attributes. :param conf: WSGI configuration parameter """ # Common on-disk hierarchy shared across account, container and object # servers. self._diskfile_mgr = DiskFileManager(conf, self.logger) # This is populated by global_conf_callback way below as the semaphore # is shared by all workers. if 'replication_semaphore' in conf: # The value was put in a list so it could get past paste self.replication_semaphore = conf['replication_semaphore'][0] else: self.replication_semaphore = None self.replication_failure_threshold = int( conf.get('replication_failure_threshold') or 100) self.replication_failure_ratio = float( conf.get('replication_failure_ratio') or 1.0) def get_diskfile(self, device, partition, account, container, obj, **kwargs): """ Utility method for instantiating a DiskFile object supporting a given REST API. An implementation of the object server that wants to use a different DiskFile class would simply over-ride this method to provide that behavior. """ return self._diskfile_mgr.get_diskfile( device, partition, account, container, obj, **kwargs) def async_update(self, op, account, container, obj, host, partition, contdevice, headers_out, objdevice): """ Sends or saves an async update. :param op: operation performed (ex: 'PUT', or 'DELETE') :param account: account name for the object :param container: container name for the object :param obj: object name :param host: host that the container is on :param partition: partition that the container is on :param contdevice: device name that the container is on :param headers_out: dictionary of headers to send in the container request :param objdevice: device name that the object is in """ headers_out['user-agent'] = 'obj-server %s' % os.getpid() full_path = '/%s/%s/%s' % (account, container, obj) if all([host, partition, contdevice]): try: with ConnectionTimeout(self.conn_timeout): ip, port = host.rsplit(':', 1) conn = http_connect(ip, port, contdevice, partition, op, full_path, headers_out) with Timeout(self.node_timeout): response = conn.getresponse() response.read() if is_success(response.status): return else: self.logger.error(_( 'ERROR Container update failed ' '(saving for async update later): %(status)d ' 'response from %(ip)s:%(port)s/%(dev)s'), {'status': response.status, 'ip': ip, 'port': port, 'dev': contdevice}) except (Exception, Timeout): self.logger.exception(_( 'ERROR container update failed with ' '%(ip)s:%(port)s/%(dev)s (saving for async update later)'), {'ip': ip, 'port': port, 'dev': contdevice}) data = {'op': op, 'account': account, 'container': container, 'obj': obj, 'headers': headers_out} timestamp = headers_out['x-timestamp'] self._diskfile_mgr.pickle_async_update(objdevice, account, container, obj, data, timestamp) def container_update(self, op, account, container, obj, request, headers_out, objdevice): """ Update the container when objects are updated. :param op: operation performed (ex: 'PUT', or 'DELETE') :param account: account name for the object :param container: container name for the object :param obj: object name :param request: the original request object driving the update :param headers_out: dictionary of headers to send in the container request(s) :param objdevice: device name that the object is in """ headers_in = request.headers conthosts = [h.strip() for h in headers_in.get('X-Container-Host', '').split(',')] contdevices = [d.strip() for d in headers_in.get('X-Container-Device', '').split(',')] contpartition = headers_in.get('X-Container-Partition', '') if len(conthosts) != len(contdevices): # This shouldn't happen unless there's a bug in the proxy, # but if there is, we want to know about it. self.logger.error(_('ERROR Container update failed: different ' 'numbers of hosts and devices in request: ' '"%s" vs "%s"') % (headers_in.get('X-Container-Host', ''), headers_in.get('X-Container-Device', ''))) return if contpartition: updates = zip(conthosts, contdevices) else: updates = [] headers_out['x-trans-id'] = headers_in.get('x-trans-id', '-') headers_out['referer'] = request.as_referer() for conthost, contdevice in updates: self.async_update(op, account, container, obj, conthost, contpartition, contdevice, headers_out, objdevice) def delete_at_update(self, op, delete_at, account, container, obj, request, objdevice): """ Update the expiring objects container when objects are updated. :param op: operation performed (ex: 'PUT', or 'DELETE') :param delete_at: scheduled delete in UNIX seconds, int :param account: account name for the object :param container: container name for the object :param obj: object name :param request: the original request driving the update :param objdevice: device name that the object is in """ if config_true_value( request.headers.get('x-backend-replication', 'f')): return delete_at = normalize_delete_at_timestamp(delete_at) updates = [(None, None)] partition = None hosts = contdevices = [None] headers_in = request.headers headers_out = HeaderKeyDict({ 'x-timestamp': headers_in['x-timestamp'], 'x-trans-id': headers_in.get('x-trans-id', '-'), 'referer': request.as_referer()}) if op != 'DELETE': delete_at_container = headers_in.get('X-Delete-At-Container', None) if not delete_at_container: self.logger.warning( 'X-Delete-At-Container header must be specified for ' 'expiring objects background %s to work properly. Making ' 'best guess as to the container name for now.' % op) # TODO(gholt): In a future release, change the above warning to # a raised exception and remove the guess code below. delete_at_container = ( int(delete_at) / self.expiring_objects_container_divisor * self.expiring_objects_container_divisor) partition = headers_in.get('X-Delete-At-Partition', None) hosts = headers_in.get('X-Delete-At-Host', '') contdevices = headers_in.get('X-Delete-At-Device', '') updates = [upd for upd in zip((h.strip() for h in hosts.split(',')), (c.strip() for c in contdevices.split(','))) if all(upd) and partition] if not updates: updates = [(None, None)] headers_out['x-size'] = '0' headers_out['x-content-type'] = 'text/plain' headers_out['x-etag'] = 'd41d8cd98f00b204e9800998ecf8427e' else: # DELETEs of old expiration data have no way of knowing what the # old X-Delete-At-Container was at the time of the initial setting # of the data, so a best guess is made here. # Worst case is a DELETE is issued now for something that doesn't # exist there and the original data is left where it is, where # it will be ignored when the expirer eventually tries to issue the # object DELETE later since the X-Delete-At value won't match up. delete_at_container = str( int(delete_at) / self.expiring_objects_container_divisor * self.expiring_objects_container_divisor) delete_at_container = normalize_delete_at_timestamp( delete_at_container) for host, contdevice in updates: self.async_update( op, self.expiring_objects_account, delete_at_container, '%s-%s/%s/%s' % (delete_at, account, container, obj), host, partition, contdevice, headers_out, objdevice) @public @timing_stats() def POST(self, request): """Handle HTTP POST requests for the Swift Object Server.""" device, partition, account, container, obj = \ split_and_validate_path(request, 5, 5, True) if 'x-timestamp' not in request.headers or \ not check_float(request.headers['x-timestamp']): return HTTPBadRequest(body='Missing timestamp', request=request, content_type='text/plain') new_delete_at = int(request.headers.get('X-Delete-At') or 0) if new_delete_at and new_delete_at < time.time(): return HTTPBadRequest(body='X-Delete-At in past', request=request, content_type='text/plain') try: disk_file = self.get_diskfile( device, partition, account, container, obj) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=device, request=request) try: orig_metadata = disk_file.read_metadata() except (DiskFileNotExist, DiskFileQuarantined): return HTTPNotFound(request=request) orig_timestamp = orig_metadata.get('X-Timestamp', '0') if orig_timestamp >= request.headers['x-timestamp']: return HTTPConflict(request=request) metadata = {'X-Timestamp': request.headers['x-timestamp']} metadata.update(val for val in request.headers.iteritems() if is_user_meta('object', val[0])) for header_key in self.allowed_headers: if header_key in request.headers: header_caps = header_key.title() metadata[header_caps] = request.headers[header_key] orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0) if orig_delete_at != new_delete_at: if new_delete_at: self.delete_at_update('PUT', new_delete_at, account, container, obj, request, device) if orig_delete_at: self.delete_at_update('DELETE', orig_delete_at, account, container, obj, request, device) disk_file.write_metadata(metadata) return HTTPAccepted(request=request) @public @timing_stats() def PUT(self, request): """Handle HTTP PUT requests for the Swift Object Server.""" device, partition, account, container, obj = \ split_and_validate_path(request, 5, 5, True) if 'x-timestamp' not in request.headers or \ not check_float(request.headers['x-timestamp']): return HTTPBadRequest(body='Missing timestamp', request=request, content_type='text/plain') error_response = check_object_creation(request, obj) if error_response: return error_response new_delete_at = int(request.headers.get('X-Delete-At') or 0) if new_delete_at and new_delete_at < time.time(): return HTTPBadRequest(body='X-Delete-At in past', request=request, content_type='text/plain') try: fsize = request.message_length() except ValueError as e: return HTTPBadRequest(body=str(e), request=request, content_type='text/plain') try: disk_file = self.get_diskfile( device, partition, account, container, obj) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=device, request=request) try: orig_metadata = disk_file.read_metadata() except (DiskFileNotExist, DiskFileQuarantined): orig_metadata = {} # Checks for If-None-Match if request.if_none_match is not None and orig_metadata: if '*' in request.if_none_match: # File exists already so return 412 return HTTPPreconditionFailed(request=request) if orig_metadata.get('ETag') in request.if_none_match: # The current ETag matches, so return 412 return HTTPPreconditionFailed(request=request) orig_timestamp = orig_metadata.get('X-Timestamp') if orig_timestamp and orig_timestamp >= request.headers['x-timestamp']: return HTTPConflict(request=request) orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0) upload_expiration = time.time() + self.max_upload_time etag = md5() elapsed_time = 0 try: with disk_file.create(size=fsize) as writer: upload_size = 0 def timeout_reader(): with ChunkReadTimeout(self.client_timeout): return request.environ['wsgi.input'].read( self.network_chunk_size) try: for chunk in iter(lambda: timeout_reader(), ''): start_time = time.time() if start_time > upload_expiration: self.logger.increment('PUT.timeouts') return HTTPRequestTimeout(request=request) etag.update(chunk) upload_size = writer.write(chunk) elapsed_time += time.time() - start_time except ChunkReadTimeout: return HTTPRequestTimeout(request=request) if upload_size: self.logger.transfer_rate( 'PUT.' + device + '.timing', elapsed_time, upload_size) if fsize is not None and fsize != upload_size: return HTTPClientDisconnect(request=request) etag = etag.hexdigest() if 'etag' in request.headers and \ request.headers['etag'].lower() != etag: return HTTPUnprocessableEntity(request=request) metadata = { 'X-Timestamp': request.headers['x-timestamp'], 'Content-Type': request.headers['content-type'], 'ETag': etag, 'Content-Length': str(upload_size), } metadata.update(val for val in request.headers.iteritems() if is_user_meta('object', val[0])) for header_key in ( request.headers.get('X-Backend-Replication-Headers') or self.allowed_headers): if header_key in request.headers: header_caps = header_key.title() metadata[header_caps] = request.headers[header_key] writer.put(metadata) except DiskFileNoSpace: return HTTPInsufficientStorage(drive=device, request=request) if orig_delete_at != new_delete_at: if new_delete_at: self.delete_at_update( 'PUT', new_delete_at, account, container, obj, request, device) if orig_delete_at: self.delete_at_update( 'DELETE', orig_delete_at, account, container, obj, request, device) self.container_update( 'PUT', account, container, obj, request, HeaderKeyDict({ 'x-size': metadata['Content-Length'], 'x-content-type': metadata['Content-Type'], 'x-timestamp': metadata['X-Timestamp'], 'x-etag': metadata['ETag']}), device) return HTTPCreated(request=request, etag=etag) @public @timing_stats() def GET(self, request): """Handle HTTP GET requests for the Swift Object Server.""" device, partition, account, container, obj = \ split_and_validate_path(request, 5, 5, True) keep_cache = self.keep_cache_private or ( 'X-Auth-Token' not in request.headers and 'X-Storage-Token' not in request.headers) try: disk_file = self.get_diskfile( device, partition, account, container, obj) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=device, request=request) try: with disk_file.open(): metadata = disk_file.get_metadata() obj_size = int(metadata['Content-Length']) file_x_ts = metadata['X-Timestamp'] file_x_ts_flt = float(file_x_ts) keep_cache = (self.keep_cache_private or ('X-Auth-Token' not in request.headers and 'X-Storage-Token' not in request.headers)) response = Response( app_iter=disk_file.reader(keep_cache=keep_cache), request=request, conditional_response=True) response.headers['Content-Type'] = metadata.get( 'Content-Type', 'application/octet-stream') for key, value in metadata.iteritems(): if is_user_meta('object', key) or \ key.lower() in self.allowed_headers: response.headers[key] = value response.etag = metadata['ETag'] response.last_modified = math.ceil(file_x_ts_flt) response.content_length = obj_size try: response.content_encoding = metadata[ 'Content-Encoding'] except KeyError: pass response.headers['X-Timestamp'] = file_x_ts resp = request.get_response(response) except (DiskFileNotExist, DiskFileQuarantined): resp = HTTPNotFound(request=request, conditional_response=True) return resp @public @timing_stats(sample_rate=0.8) def HEAD(self, request): """Handle HTTP HEAD requests for the Swift Object Server.""" device, partition, account, container, obj = \ split_and_validate_path(request, 5, 5, True) try: disk_file = self.get_diskfile( device, partition, account, container, obj) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=device, request=request) try: metadata = disk_file.read_metadata() except (DiskFileNotExist, DiskFileQuarantined): return HTTPNotFound(request=request, conditional_response=True) response = Response(request=request, conditional_response=True) response.headers['Content-Type'] = metadata.get( 'Content-Type', 'application/octet-stream') for key, value in metadata.iteritems(): if is_user_meta('object', key) or \ key.lower() in self.allowed_headers: response.headers[key] = value response.etag = metadata['ETag'] ts = metadata['X-Timestamp'] response.last_modified = math.ceil(float(ts)) # Needed for container sync feature response.headers['X-Timestamp'] = ts response.content_length = int(metadata['Content-Length']) try: response.content_encoding = metadata['Content-Encoding'] except KeyError: pass return response @public @timing_stats() def DELETE(self, request): """Handle HTTP DELETE requests for the Swift Object Server.""" device, partition, account, container, obj = \ split_and_validate_path(request, 5, 5, True) if 'x-timestamp' not in request.headers or \ not check_float(request.headers['x-timestamp']): return HTTPBadRequest(body='Missing timestamp', request=request, content_type='text/plain') try: disk_file = self.get_diskfile( device, partition, account, container, obj) except DiskFileDeviceUnavailable: return HTTPInsufficientStorage(drive=device, request=request) try: orig_metadata = disk_file.read_metadata() except DiskFileExpired as e: orig_timestamp = e.timestamp orig_metadata = e.metadata response_class = HTTPNotFound except DiskFileDeleted as e: orig_timestamp = e.timestamp orig_metadata = {} response_class = HTTPNotFound except (DiskFileNotExist, DiskFileQuarantined): orig_timestamp = 0 orig_metadata = {} response_class = HTTPNotFound else: orig_timestamp = orig_metadata.get('X-Timestamp', 0) if orig_timestamp < request.headers['x-timestamp']: response_class = HTTPNoContent else: response_class = HTTPConflict orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0) try: req_if_delete_at_val = request.headers['x-if-delete-at'] req_if_delete_at = int(req_if_delete_at_val) except KeyError: pass except ValueError: return HTTPBadRequest( request=request, body='Bad X-If-Delete-At header value') else: if orig_delete_at != req_if_delete_at: return HTTPPreconditionFailed( request=request, body='X-If-Delete-At and X-Delete-At do not match') if orig_delete_at: self.delete_at_update('DELETE', orig_delete_at, account, container, obj, request, device) req_timestamp = request.headers['X-Timestamp'] if orig_timestamp < req_timestamp: disk_file.delete(req_timestamp) self.container_update( 'DELETE', account, container, obj, request, HeaderKeyDict({'x-timestamp': req_timestamp}), device) return response_class(request=request) @public @replication @timing_stats(sample_rate=0.1) def REPLICATE(self, request): """ Handle REPLICATE requests for the Swift Object Server. This is used by the object replicator to get hashes for directories. """ device, partition, suffix = split_and_validate_path( request, 2, 3, True) try: hashes = self._diskfile_mgr.get_hashes(device, partition, suffix) except DiskFileDeviceUnavailable: resp = HTTPInsufficientStorage(drive=device, request=request) else: resp = Response(body=pickle.dumps(hashes)) return resp @public @replication @timing_stats(sample_rate=0.1) def REPLICATION(self, request): return Response(app_iter=ssync_receiver.Receiver(self, request)()) def __call__(self, env, start_response): """WSGI Application entry point for the Swift Object Server.""" start_time = time.time() req = Request(env) self.logger.txn_id = req.headers.get('x-trans-id', None) if not check_utf8(req.path_info): res = HTTPPreconditionFailed(body='Invalid UTF8 or contains NULL') else: try: # disallow methods which have not been marked 'public' try: method = getattr(self, req.method) getattr(method, 'publicly_accessible') replication_method = getattr(method, 'replication', False) if (self.replication_server is not None and self.replication_server != replication_method): raise AttributeError('Not allowed method.') except AttributeError: res = HTTPMethodNotAllowed() else: res = method(req) except DiskFileCollision: res = HTTPForbidden(request=req) except HTTPException as error_response: res = error_response except (Exception, Timeout): self.logger.exception(_( 'ERROR __call__ error with %(method)s' ' %(path)s '), {'method': req.method, 'path': req.path}) res = HTTPInternalServerError(body=traceback.format_exc()) trans_time = time.time() - start_time if self.log_requests: log_line = get_log_line(req, res, trans_time, '') if req.method in ('REPLICATE', 'REPLICATION') or \ 'X-Backend-Replication' in req.headers: self.logger.debug(log_line) else: self.logger.info(log_line) if req.method in ('PUT', 'DELETE'): slow = self.slow - trans_time if slow > 0: sleep(slow) return res(env, start_response)
class MetadataController(object): """" WSGI Controller for metadata server """ save_headers = [ 'x-metadata-read', 'x-metadata-write', 'x-metadata-sync-key', 'x-metadata-sync-to' ] def __init__(self, conf, logger=None): # location/directory of the metadata database (meta.db) self.location = conf.get('location', '/srv/node/sdb1/metadata/') # path the the actual file #self.db_file = os.path.join(self.location, 'meta.db') self.logger = logger or get_logger(conf, log_route='metadata-server') self.root = conf.get('devices', '/srv/node') #workaround for device listings self.node_count = conf.get('nodecount','8') self.devicelist = [] for x in range(0,int(self.node_count)): self.devicelist.append(conf.get('device'+str(x),'')) self.mount_check = config_true_value(conf.get('mount_check', 'true')) self.node_timeout = int(conf.get('node_timeout', 3)) self.conn_timeout = float(conf.get('node_timeout', 3)) replication_server = conf.get('replication_server', None) if replication_server is not None: replication_server = config_true_value(replication_server) self.replication_server = replication_server self.allowed_sync_hosts = [ h.strip() for h in conf.get('allowed_sync_hosts', '127.0.0.1').split(',') if h.strip() ] self.replicator_rpc = ReplicatorRpc( self.root, DATADIR, MetadataBroker, self.mount_check, logger=self.logger ) self.diskfile_mgr = DiskFileManager(conf,self.logger) self.db_ip = conf.get('db_ip', '127.0.0.1') self.db_port = int(conf.get('db_port', 2424)) self.db_user = conf.get('db_user', 'root') self.db_pw = conf.get('db_pw', 'root') if config_true_value(conf.get('allow_versions', 'f')): self.save_headers.append('x-versions-location') swift.common.db.DB_PREALLOCATION = config_true_value( conf.get('db_preallocation', 'f')) self.broker = self._get_metadata_broker()# self.broker.initialize()# def _get_metadata_broker(self, **kwargs): """ Returns an instance of the DB abstraction layer object (broker) """ return MetadataBroker(self.db_ip,self.db_port,self.db_user,self.db_pw) def check_attrs(self, attrs, acc, con, obj): """ Verify that attributes are valid Checks the attr list against a list of system attributes Allows for custom metadata. returns: boolean wether the attrs are valid """ for attr in attrs.split(','): if attr.startswith('object_meta') or \ attr.startswith('container_meta') or \ attr.startswith('account_meta'): pass elif attr not in [ 'object_uri', 'object_name', 'object_account_name', 'object_container_name', 'object_location', 'object_uri_create_time', 'object_last_modified_time', 'object_last_changed_time', 'object_delete_time', 'object_last_activity_time', 'object_etag_hash', 'object_content_type', 'object_content_length', 'object_content_encoding', 'object_content_disposition', 'object_content_language', 'object_cache_control', 'object_delete_at', 'object_manifest_type', 'object_manifest', 'object_access_control_allow_origin', 'object_access_control_allow_credentials', 'object_access_control_expose_headers', 'object_access_control_max_age', 'object_access_control_allow_methods', 'object_access_control_allow_headers', 'object_origin', 'object_access_control_request_method', 'object_access_control_request_headers', 'object_meta', 'container_uri', 'container_name', 'container_account_name', 'container_create_time', 'container_last_modified_time', 'container_last_changed_time', 'container_delete_time', 'container_last_activity_time', 'container_read_permissions', 'container_write_permissions', 'container_sync_to', 'container_sync_key', 'container_versions_location', 'container_object_count', 'container_bytes_used', 'container_meta', 'account_uri', 'account_name', 'account_tenant_id', 'account_first_use_time', 'account_last_modified_time', 'account_last_changed_time', 'account_delete_time', 'account_last_activity_time', 'account_container_count', 'account_object_count', 'account_bytes_used', 'account_meta', 'all_attrs', 'all_system_attrs', 'all_meta_attrs', 'all_account_attrs' 'all_account_system_attrs', 'all_account_meta_attrs', 'all_container_attrs', 'all_container_system_attrs' 'all_container_meta_attrs', 'all_object_attrs', 'all_object_system_attrs' 'all_object_meta_attrs']: return False return True @public @timing_stats() def GET(self, req): """ Handle HTTP GET requests Build SQL queries piece by piece and then execute Custom attributes need to be handled specially, since they exist in a seperate table """ #broker = self._get_metadata_broker() #broker.initialize() base_version, acc, con, obj = split_path(req.path, 1, 4, True) if 'sorted' in req.headers: sort_value_list = req.headers['sorted'] if sort_value_list == '': sort_value_list = 'uri' toSort = True else: toSort = False if 'attributes' in req.headers: attrs = req.headers['attributes'] # if there is no attributes lists, include everything in scope # since no attributes passed in, there can be # things from multiple levels of scope else: attrs = "object_uri,container_uri,account_uri" attrs, all_obj_meta, all_con_meta, all_acc_meta = \ eval_superset(attrs.split(",")) format = "text/plain" if self.check_attrs(attrs, acc, con, obj) or attrs == '': accAttrs, conAttrs, objAttrs, superAttrs, customAttrs = \ split_attrs_by_scope(attrs) """ If we have a thing from which we don't request any sys attrs Then we need to add its uri so that it appears in the list returned from the query. After we query for custom attrs, we need to delete any thing that is empty. """ if all_obj_meta and objAttrs == "": objAttrs = "object_uri" if all_con_meta and conAttrs == "": conAttrs = "container_uri" if all_acc_meta and accAttrs == "": accAttrs = "account_uri" # Builds initial query containing the # split attributes for each item type accQuery = self.broker.get_attributes_query(acc, con, obj, accAttrs) conQuery = self.broker.get_attributes_query(acc, con, obj, conAttrs) objQuery = self.broker.get_attributes_query(acc, con, obj, objAttrs) # If there is a query in the request add it to the end # of the WHERE clause of the SQL if 'query' in req.headers: query = req.headers['query'] accQuery = self.broker.get_uri_query(accQuery, query) conQuery = self.broker.get_uri_query(conQuery, query) objQuery = self.broker.get_uri_query(objQuery, query) # if successful query add the results to the end of the # accumulator list ret = [] if not accQuery.startswith("BAD"): ret.extend(self.broker.execute_query( accQuery, acc, con, obj, 'account_uri' in attrs.split(','))) if not conQuery.startswith("BAD"): ret.extend(self.broker.execute_query( conQuery, acc, con, obj, 'container_uri' in attrs.split(','))) if not objQuery.startswith("BAD"): ret.extend(self.broker.execute_query( objQuery, acc, con, obj, 'object_uri' in attrs.split(','))) # query the custom table ret = self.broker.custom_attributes_query( customAttrs, ret, all_obj_meta, all_con_meta, all_acc_meta) """ Do the deletion thing mentioned above """ ret = [x for x in ret if x[x.keys()[0]] != {}] if toSort: sorter = Sort_metadata() ret = sorter.sort_data(ret, sort_value_list.split(",")) # default format is plain text # can choose between json/xml as well # no error handling done right now # just default everything to plain if spelling error if "format" in req.headers: if req.headers['format'] == "json": format = "application/json" out = output_json(ret) elif req.headers['format'] == "xml": format = "application/xml" out = output_xml(ret) else: out = output_plain(ret) else: out = output_plain(ret) status = 200 else: out = "One or more attributes not supported" status = 400 format = "text/plain" # Returns the HTTP Response object with the result of the API request return Response( request=req, body=out + "\n", content_type=format, status=status) @public #@timing_stats() #TODO: reorganize code to generalize repeated calls #TODO: abstract data/object type names for generic calls def PUT(self, req): version, acc, con, obj = split_path(req.path, 1, 4, True) stor_policy = req.headers['storage_policy'] ring = POLICIES.get_object_ring(stor_policy, '/etc/swift') #broker = self._get_metadata_broker() #broker.initialize() #Handle Container PUT if not obj: hsh = hash_path(acc, con) part = ring.get_part(acc, con) db_dir = storage_directory(swift.container.backend.DATADIR, part, hsh) nodes = ring.get_part_nodes(part) for node in nodes: for item in self.devicelist: if node['device'] in item: try: path = os.path.join(self.root + item, db_dir, hsh + '.db') #TODO: move kwargs kwargs = {'account':acc, 'container':con, 'logger':self.logger} md_broker= swift.container.backend.ContainerBroker(path, **kwargs) md = md_broker.get_info() md.update( (key, value) for key, (value, timestamp) in md_broker.metadata.iteritems() if value != '' and is_sys_or_user_meta('container', key)) sys_md = format_con_metadata(md) user_md = format_custom_metadata(md) if 'X-Container-Read' in req.headers: sys_md['container_read_permissions'] = req.headers['X-Container-Read'] if 'X-Container-Write' in req.headers: sys_md['container_write_permissions'] = req.headers['X-Container-Write'] #TODO: insert container_last_activity_time #TODO: split meta user/sys #TODO: insert meta self.broker.insert_container_md([sys_md]) return except DatabaseConnectionError as e: self.logger.warn("DatabaseConnectionError: " + e.path + "\n") pass except: self.logger.warn("%s: %s\n"%(str(sys.exc_info()[0]),str(sys.exc_info()[1]))) pass #handle object PUT else: part = ring.get_part(acc, con, obj) nodes = ring.get_part_nodes(part) for node in nodes: for item in self.devicelist: if node['device'] in item: try: df = self.diskfile_mgr.get_diskfile(item, part, acc, con, obj, stor_policy) md = df.read_metadata() sys_md = format_obj_metadata(md) #df._data_file is a direct path to the objects data sys_md['object_location'] = df._data_file user_md = format_custom_metadata(md) #TODO: insert user meta and sys meta self.broker.insert_object_md([sys_md]) except: self.logger.warn("%s: %s\n"%(str(sys.exc_info()[0]),str(sys.exc_info()[1]))) pass return @public @timing_stats() def DELETE(self, req): version, acc, con, obj = split_path(req.path, 1, 4, True) timestamp = Timestamp(time.time()).isoformat() data_type = '' md = {} if not con and not obj: #do nothing. accounts cannot be deleted return elif not obj: md = format_con_metadata(md) md['container_delete_time'] = timestamp md['container_last_activity_time'] = timestamp data_type = 'container' for item in \ (data_type + '_uri', data_type + '_name'): if item in md: del md[item] #TODO: overwrite container metadata #TODO: delete container custom metadata else: md = format_obj_metadata(md) md['object_delete_time'] = timestamp md['object_last_activity_time'] = timestamp data_type = 'object' for item in \ (data_type + '_uri', data_type + '_name'): if item in md: del md[item] #TODO: overwrite object metadata #TODO: delete object user meta return #TODO: generalize strings used for repeat calls @public #@timing_stats() def POST(self, req): version, acc, con, obj = split_path(req.path, 1, 4, True) stor_policy = req.headers['storage_policy'] ring = POLICIES.get_object_ring(stor_policy, '/etc/swift') if not con and not obj: meta_type = 'account' kwargs = {'account':acc, 'logger':self.logger} data_dir = swift.account.backend.DATADIR hsh = hash_path(acc) part = ring.get_part(acc) db_dir = storage_directory(data_dir, part, hsh) nodes = ring.get_part_nodes(part) for node in nodes: for item in self.devicelist: if node['device'] in item: try: path = os.path.join(self.root + item, db_dir, hsh + '.db') broker = swift.account.backend.AccountBroker(path, **kwargs) md = broker.get_info() md.update( (key, value) for key, (value, timestamp) in broker.metadata.iteritems() if value != '' and is_sys_or_user_meta(meta_type, key)) sys_md = format_acc_metadata(md) user_md = format_custom_metadata(md) #TODO: call overwrite_account_metadata #TODO: call overwrite_custom_metadata return except: self.logger.warn("%s: %s\n"%(str(sys.exc_info()[0]),str(sys.exc_info()[1]))) pass #Handle Container POST elif not obj: meta_type = 'container' kwargs = {'account':acc, 'container':con, 'logger':self.logger} data_dir = swift.container.backend.DATADIR try: hsh = hash_path(acc, con) part = ring.get_part(acc, con) db_dir = storage_directory(data_dir, part, hsh) nodes = ring.get_part_nodes(part) for node in nodes: for item in self.devicelist: if node['device'] in item: try: path = os.path.join(self.root + item, db_dir, hsh + '.db') broker = swift.container.backend.ContainerBroker(path, **kwargs) md = broker.get_info() md.update( (key, value) for key, (value, timestamp) in broker.metadata.iteritems() if value != '' and is_sys_or_user_meta('container', key)) sys_md = format_con_metadata(md) user_md = format_custom_metadata(md) if 'X-Container-Read' in req.headers: sys_md['container_read_permissions'] = req.headers['X-Container-Read'] if 'X-Container-Write' in req.headers: sys_md['container_write_permissions'] = req.headers['X-Container-Write'] #TODO: call overwrite_container_metadata #TODO: call overwrite_custom_metadata return except DatabaseConnectionError as e: self.logger.warn("DatabaseConnectionError: " + e.path + "\n") pass except: self.logger.warn("%s: %s\n"%(str(sys.exc_info()[0]),str(sys.exc_info()[1]))) pass else: part = ring.get_part(acc, con, obj) nodes = ring.get_part_nodes(part) for node in nodes: for item in self.devicelist: if node['device'] in item: try: df = self.diskfile_mgr.get_diskfile(item, part, acc, con, obj, stor_policy) md = df.read_metadata() sys_md = format_obj_metadata(md) user_md = format_custom_metadata(md) #TODO: call overwrite_object_metadata #TODO: call overwrite_custom_metadata except: self.logger.warn("%s: %s\n"%(str(sys.exc_info()[0]),str(sys.exc_info()[1]))) pass return @public @timing_stats() def COPY(self, req): version, acc, con, obj = split_path(req.path, 1, 4, True) def __call__(self, env, start_response): """ Boilerplate code for how the server's code gets called upon receiving a request. Taken directly from other servers. """ # start_time = time.time() req = Request(env) self.logger.txn_id = req.headers.get('x-trans-id', None) if not check_utf8(req.path_info): res = HTTPPreconditionFailed(body='Invalid UTF8 or contains NULL') else: try: # disallow methods which have not been marked 'public' try: method = getattr(self, req.method) getattr(method, 'publicly_accessible') replication_method = getattr(method, 'replication', False) if (self.replication_server is not None and self.replication_server != replication_method): raise AttributeError('Not allowed method.') except AttributeError: res = HTTPMethodNotAllowed() else: res = method(req) except HTTPException as error_response: res = error_response except (Exception, Timeout): self.logger.exception(_( 'ERROR __call__ error with %(method)s %(path)s '), {'method': req.method, 'path': req.path}) res = HTTPInternalServerError(body=traceback.format_exc()) # trans_time = '%.4f' % (time.time() - start_time) # if self.log_requests: # log_message = '%s - - [%s] "%s %s" %s %s "%s" "%s" "%s" %s' % ( # req.remote_addr, # time.strftime('%d/%b/%Y:%H:%M:%S +0000', # time.gmtime()), # req.method, req.path, # res.status.split()[0], res.content_length or '-', # req.headers.get('x-trans-id', '-'), # req.referer or '-', req.user_agent or '-', # trans_time) # if req.method.upper() == 'REPLICATE': # self.logger.debug(log_message) # else: # self.logger.info(log_message) return res(env, start_response)
class TestAuditor(unittest.TestCase): def setUp(self): self.testdir = os.path.join(mkdtemp(), 'tmp_test_object_auditor') self.devices = os.path.join(self.testdir, 'node') self.rcache = os.path.join(self.testdir, 'object.recon') self.logger = FakeLogger() rmtree(self.testdir, ignore_errors=1) mkdirs(os.path.join(self.devices, 'sda')) os.mkdir(os.path.join(self.devices, 'sdb')) # policy 0 self.objects = os.path.join(self.devices, 'sda', get_data_dir(0)) self.objects_2 = os.path.join(self.devices, 'sdb', get_data_dir(0)) os.mkdir(self.objects) # policy 1 self.objects_p1 = os.path.join(self.devices, 'sda', get_data_dir(1)) self.objects_2_p1 = os.path.join(self.devices, 'sdb', get_data_dir(1)) os.mkdir(self.objects_p1) self.parts = self.parts_p1 = {} for part in ['0', '1', '2', '3']: self.parts[part] = os.path.join(self.objects, part) self.parts_p1[part] = os.path.join(self.objects_p1, part) os.mkdir(os.path.join(self.objects, part)) os.mkdir(os.path.join(self.objects_p1, part)) self.conf = dict( devices=self.devices, mount_check='false', object_size_stats='10,100,1024,10240') self.df_mgr = DiskFileManager(self.conf, self.logger) # diskfiles for policy 0, 1 self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o', 0) self.disk_file_p1 = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o', 1) def tearDown(self): rmtree(os.path.dirname(self.testdir), ignore_errors=1) unit.xattr_data = {} def test_worker_conf_parms(self): def check_common_defaults(): self.assertEquals(auditor_worker.max_bytes_per_second, 10000000) self.assertEquals(auditor_worker.log_time, 3600) # test default values conf = dict( devices=self.devices, mount_check='false', object_size_stats='10,100,1024,10240') auditor_worker = auditor.AuditorWorker(conf, self.logger, self.rcache, self.devices) check_common_defaults() self.assertEquals(auditor_worker.diskfile_mgr.disk_chunk_size, 65536) self.assertEquals(auditor_worker.max_files_per_second, 20) self.assertEquals(auditor_worker.zero_byte_only_at_fps, 0) # test specified audit value overrides conf.update({'disk_chunk_size': 4096}) auditor_worker = auditor.AuditorWorker(conf, self.logger, self.rcache, self.devices, zero_byte_only_at_fps=50) check_common_defaults() self.assertEquals(auditor_worker.diskfile_mgr.disk_chunk_size, 4096) self.assertEquals(auditor_worker.max_files_per_second, 50) self.assertEquals(auditor_worker.zero_byte_only_at_fps, 50) def test_object_audit_extra_data(self): def run_tests(disk_file): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) data = '0' * 1024 etag = md5() with disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() timestamp = str(normalize_timestamp(time.time())) metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) pre_quarantines = auditor_worker.quarantines auditor_worker.object_audit( AuditLocation(disk_file._datadir, 'sda', '0')) self.assertEquals(auditor_worker.quarantines, pre_quarantines) os.write(writer._fd, 'extra_data') auditor_worker.object_audit( AuditLocation(disk_file._datadir, 'sda', '0')) self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) run_tests(self.disk_file) run_tests(self.disk_file_p1) def test_object_audit_diff_data(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) data = '0' * 1024 etag = md5() timestamp = str(normalize_timestamp(time.time())) with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) pre_quarantines = auditor_worker.quarantines # remake so it will have metadata self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o') auditor_worker.object_audit( AuditLocation(self.disk_file._datadir, 'sda', '0')) self.assertEquals(auditor_worker.quarantines, pre_quarantines) etag = md5() etag.update('1' + '0' * 1023) etag = etag.hexdigest() metadata['ETag'] = etag with self.disk_file.create() as writer: writer.write(data) writer.put(metadata) auditor_worker.object_audit( AuditLocation(self.disk_file._datadir, 'sda', '0')) self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_audit_no_meta(self): timestamp = str(normalize_timestamp(time.time())) path = os.path.join(self.disk_file._datadir, timestamp + '.data') mkdirs(self.disk_file._datadir) fp = open(path, 'w') fp.write('0' * 1024) fp.close() invalidate_hash(os.path.dirname(self.disk_file._datadir)) auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) pre_quarantines = auditor_worker.quarantines auditor_worker.object_audit( AuditLocation(self.disk_file._datadir, 'sda', '0')) self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_audit_will_not_swallow_errors_in_tests(self): timestamp = str(normalize_timestamp(time.time())) path = os.path.join(self.disk_file._datadir, timestamp + '.data') mkdirs(self.disk_file._datadir) with open(path, 'w') as f: write_metadata(f, {'name': '/a/c/o'}) auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) def blowup(*args): raise NameError('tpyo') with mock.patch.object(DiskFileManager, 'get_diskfile_from_audit_location', blowup): self.assertRaises(NameError, auditor_worker.object_audit, AuditLocation(os.path.dirname(path), 'sda', '0')) def test_failsafe_object_audit_will_swallow_errors_in_tests(self): timestamp = str(normalize_timestamp(time.time())) path = os.path.join(self.disk_file._datadir, timestamp + '.data') mkdirs(self.disk_file._datadir) with open(path, 'w') as f: write_metadata(f, {'name': '/a/c/o'}) auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) def blowup(*args): raise NameError('tpyo') with mock.patch('swift.obj.diskfile.DiskFile', blowup): auditor_worker.failsafe_object_audit( AuditLocation(os.path.dirname(path), 'sda', '0')) self.assertEquals(auditor_worker.errors, 1) def test_generic_exception_handling(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) # pretend that we logged (and reset counters) just now auditor_worker.last_logged = time.time() timestamp = str(normalize_timestamp(time.time())) pre_errors = auditor_worker.errors data = '0' * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) with mock.patch('swift.obj.diskfile.DiskFile', lambda *_: 1 / 0): auditor_worker.audit_all_objects() self.assertEquals(auditor_worker.errors, pre_errors + 1) def test_object_run_once_pass(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) auditor_worker.log_time = 0 timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines data = '0' * 1024 def write_file(df): etag = md5() with df.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) # policy 0 write_file(self.disk_file) # policy 1 write_file(self.disk_file_p1) auditor_worker.audit_all_objects() self.assertEquals(auditor_worker.quarantines, pre_quarantines) # 1 object per policy falls into 1024 bucket self.assertEquals(auditor_worker.stats_buckets[1024], 2) self.assertEquals(auditor_worker.stats_buckets[10240], 0) # pick up some additional code coverage, large file data = '0' * 1024 * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb']) self.assertEquals(auditor_worker.quarantines, pre_quarantines) # still have the 1024 byte object left in policy-1 (plus the # stats from the original 2) self.assertEquals(auditor_worker.stats_buckets[1024], 3) self.assertEquals(auditor_worker.stats_buckets[10240], 0) # and then policy-0 disk_file was re-written as a larger object self.assertEquals(auditor_worker.stats_buckets['OVER'], 1) # pick up even more additional code coverage, misc paths auditor_worker.log_time = -1 auditor_worker.stats_sizes = [] auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb']) self.assertEquals(auditor_worker.quarantines, pre_quarantines) self.assertEquals(auditor_worker.stats_buckets[1024], 3) self.assertEquals(auditor_worker.stats_buckets[10240], 0) self.assertEquals(auditor_worker.stats_buckets['OVER'], 1) def test_object_run_logging(self): logger = FakeLogger() auditor_worker = auditor.AuditorWorker(self.conf, logger, self.rcache, self.devices) auditor_worker.audit_all_objects(device_dirs=['sda']) log_lines = logger.get_lines_for_level('info') self.assertTrue(len(log_lines) > 0) self.assertTrue(log_lines[0].index('ALL - parallel, sda')) logger = FakeLogger() auditor_worker = auditor.AuditorWorker(self.conf, logger, self.rcache, self.devices, zero_byte_only_at_fps=50) auditor_worker.audit_all_objects(device_dirs=['sda']) log_lines = logger.get_lines_for_level('info') self.assertTrue(len(log_lines) > 0) self.assertTrue(log_lines[0].index('ZBF - sda')) def test_object_run_once_no_sda(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines # pretend that we logged (and reset counters) just now auditor_worker.last_logged = time.time() data = '0' * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) os.write(writer._fd, 'extra_data') auditor_worker.audit_all_objects() self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_run_once_multi_devices(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) # pretend that we logged (and reset counters) just now auditor_worker.last_logged = time.time() timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines data = '0' * 10 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) auditor_worker.audit_all_objects() self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'ob') data = '1' * 10 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) os.write(writer._fd, 'extra_data') auditor_worker.audit_all_objects() self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_run_fast_track_non_zero(self): self.auditor = auditor.ObjectAuditor(self.conf) self.auditor.log_time = 0 data = '0' * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': str(normalize_timestamp(time.time())), 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) etag = md5() etag.update('1' + '0' * 1023) etag = etag.hexdigest() metadata['ETag'] = etag write_metadata(writer._fd, metadata) quarantine_path = os.path.join(self.devices, 'sda', 'quarantined', 'objects') kwargs = {'mode': 'once'} kwargs['zero_byte_fps'] = 50 self.auditor.run_audit(**kwargs) self.assertFalse(os.path.isdir(quarantine_path)) del(kwargs['zero_byte_fps']) self.auditor.run_audit(**kwargs) self.assertTrue(os.path.isdir(quarantine_path)) def setup_bad_zero_byte(self, with_ts=False): self.auditor = auditor.ObjectAuditor(self.conf) self.auditor.log_time = 0 ts_file_path = '' if with_ts: name_hash = hash_path('a', 'c', 'o') dir_path = os.path.join( self.devices, 'sda', storage_directory(get_data_dir(0), '0', name_hash)) ts_file_path = os.path.join(dir_path, '99999.ts') if not os.path.exists(dir_path): mkdirs(dir_path) fp = open(ts_file_path, 'w') write_metadata(fp, {'X-Timestamp': '99999', 'name': '/a/c/o'}) fp.close() etag = md5() with self.disk_file.create() as writer: etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': str(normalize_timestamp(time.time())), 'Content-Length': 10, } writer.put(metadata) etag = md5() etag = etag.hexdigest() metadata['ETag'] = etag write_metadata(writer._fd, metadata) return ts_file_path def test_object_run_fast_track_all(self): self.setup_bad_zero_byte() kwargs = {'mode': 'once'} self.auditor.run_audit(**kwargs) quarantine_path = os.path.join(self.devices, 'sda', 'quarantined', 'objects') self.assertTrue(os.path.isdir(quarantine_path)) def test_object_run_fast_track_zero(self): self.setup_bad_zero_byte() kwargs = {'mode': 'once'} kwargs['zero_byte_fps'] = 50 self.auditor.run_audit(**kwargs) quarantine_path = os.path.join(self.devices, 'sda', 'quarantined', 'objects') self.assertTrue(os.path.isdir(quarantine_path)) def test_object_run_fast_track_zero_check_closed(self): rat = [False] class FakeFile(DiskFile): def _quarantine(self, data_file, msg): rat[0] = True DiskFile._quarantine(self, data_file, msg) self.setup_bad_zero_byte() was_df = auditor.diskfile.DiskFile try: auditor.diskfile.DiskFile = FakeFile kwargs = {'mode': 'once'} kwargs['zero_byte_fps'] = 50 self.auditor.run_audit(**kwargs) quarantine_path = os.path.join(self.devices, 'sda', 'quarantined', 'objects') self.assertTrue(os.path.isdir(quarantine_path)) self.assertTrue(rat[0]) finally: auditor.diskfile.DiskFile = was_df def test_with_tombstone(self): ts_file_path = self.setup_bad_zero_byte(with_ts=True) self.assertTrue(ts_file_path.endswith('ts')) kwargs = {'mode': 'once'} self.auditor.run_audit(**kwargs) self.assertTrue(os.path.exists(ts_file_path)) def test_sleeper(self): with mock.patch( 'time.sleep', mock.MagicMock()) as mock_sleep: auditor.SLEEP_BETWEEN_AUDITS = 0.10 my_auditor = auditor.ObjectAuditor(self.conf) my_auditor._sleep() mock_sleep.assert_called_with(auditor.SLEEP_BETWEEN_AUDITS) def test_run_parallel_audit(self): class StopForever(Exception): pass class Bogus(Exception): pass class ObjectAuditorMock(object): check_args = () check_kwargs = {} check_device_dir = None fork_called = 0 master = 0 wait_called = 0 def mock_run(self, *args, **kwargs): self.check_args = args self.check_kwargs = kwargs if 'zero_byte_fps' in kwargs: self.check_device_dir = kwargs.get('device_dirs') def mock_sleep_stop(self): raise StopForever('stop') def mock_sleep_continue(self): return def mock_audit_loop_error(self, parent, zbo_fps, override_devices=None, **kwargs): raise Bogus('exception') def mock_fork(self): self.fork_called += 1 if self.master: return self.fork_called else: return 0 def mock_wait(self): self.wait_called += 1 return (self.wait_called, 0) for i in string.ascii_letters[2:26]: mkdirs(os.path.join(self.devices, 'sd%s' % i)) my_auditor = auditor.ObjectAuditor(dict(devices=self.devices, mount_check='false', zero_byte_files_per_second=89, concurrency=1)) mocker = ObjectAuditorMock() my_auditor.logger.exception = mock.MagicMock() real_audit_loop = my_auditor.audit_loop my_auditor.audit_loop = mocker.mock_audit_loop_error my_auditor.run_audit = mocker.mock_run was_fork = os.fork was_wait = os.wait os.fork = mocker.mock_fork os.wait = mocker.mock_wait try: my_auditor._sleep = mocker.mock_sleep_stop my_auditor.run_once(zero_byte_fps=50) my_auditor.logger.exception.assert_called_once_with( 'ERROR auditing: exception') my_auditor.logger.exception.reset_mock() self.assertRaises(StopForever, my_auditor.run_forever) my_auditor.logger.exception.assert_called_once_with( 'ERROR auditing: exception') my_auditor.audit_loop = real_audit_loop self.assertRaises(StopForever, my_auditor.run_forever, zero_byte_fps=50) self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 50) self.assertEquals(mocker.fork_called, 0) self.assertRaises(SystemExit, my_auditor.run_once) self.assertEquals(mocker.fork_called, 1) self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 89) self.assertEquals(mocker.check_device_dir, []) self.assertEquals(mocker.check_args, ()) device_list = ['sd%s' % i for i in string.ascii_letters[2:10]] device_string = ','.join(device_list) device_string_bogus = device_string + ',bogus' mocker.fork_called = 0 self.assertRaises(SystemExit, my_auditor.run_once, devices=device_string_bogus) self.assertEquals(mocker.fork_called, 1) self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 89) self.assertEquals(sorted(mocker.check_device_dir), device_list) mocker.master = 1 mocker.fork_called = 0 self.assertRaises(StopForever, my_auditor.run_forever) # Fork is called 2 times since the zbf process is forked just # once before self._sleep() is called and StopForever is raised # Also wait is called just once before StopForever is raised self.assertEquals(mocker.fork_called, 2) self.assertEquals(mocker.wait_called, 1) my_auditor._sleep = mocker.mock_sleep_continue my_auditor.concurrency = 2 mocker.fork_called = 0 mocker.wait_called = 0 my_auditor.run_once() # Fork is called no. of devices + (no. of devices)/2 + 1 times # since zbf process is forked (no.of devices)/2 + 1 times no_devices = len(os.listdir(self.devices)) self.assertEquals(mocker.fork_called, no_devices + no_devices / 2 + 1) self.assertEquals(mocker.wait_called, no_devices + no_devices / 2 + 1) finally: os.fork = was_fork os.wait = was_wait
class TestAuditor(unittest.TestCase): def setUp(self): self.testdir = os.path.join(mkdtemp(), 'tmp_test_object_auditor') self.devices = os.path.join(self.testdir, 'node') self.rcache = os.path.join(self.testdir, 'object.recon') self.logger = FakeLogger() rmtree(self.testdir, ignore_errors=1) mkdirs(os.path.join(self.devices, 'sda')) os.mkdir(os.path.join(self.devices, 'sdb')) # policy 0 self.objects = os.path.join(self.devices, 'sda', get_data_dir(POLICIES[0])) self.objects_2 = os.path.join(self.devices, 'sdb', get_data_dir(POLICIES[0])) os.mkdir(self.objects) # policy 1 self.objects_p1 = os.path.join(self.devices, 'sda', get_data_dir(POLICIES[1])) self.objects_2_p1 = os.path.join(self.devices, 'sdb', get_data_dir(POLICIES[1])) os.mkdir(self.objects_p1) self.parts = self.parts_p1 = {} for part in ['0', '1', '2', '3']: self.parts[part] = os.path.join(self.objects, part) self.parts_p1[part] = os.path.join(self.objects_p1, part) os.mkdir(os.path.join(self.objects, part)) os.mkdir(os.path.join(self.objects_p1, part)) self.conf = dict( devices=self.devices, mount_check='false', object_size_stats='10,100,1024,10240') self.df_mgr = DiskFileManager(self.conf, self.logger) # diskfiles for policy 0, 1 self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o', policy=POLICIES[0]) self.disk_file_p1 = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o', policy=POLICIES[1]) def tearDown(self): rmtree(os.path.dirname(self.testdir), ignore_errors=1) unit.xattr_data = {} def test_worker_conf_parms(self): def check_common_defaults(): self.assertEqual(auditor_worker.max_bytes_per_second, 10000000) self.assertEqual(auditor_worker.log_time, 3600) # test default values conf = dict( devices=self.devices, mount_check='false', object_size_stats='10,100,1024,10240') auditor_worker = auditor.AuditorWorker(conf, self.logger, self.rcache, self.devices) check_common_defaults() self.assertEqual(auditor_worker.diskfile_mgr.disk_chunk_size, 65536) self.assertEqual(auditor_worker.max_files_per_second, 20) self.assertEqual(auditor_worker.zero_byte_only_at_fps, 0) # test specified audit value overrides conf.update({'disk_chunk_size': 4096}) auditor_worker = auditor.AuditorWorker(conf, self.logger, self.rcache, self.devices, zero_byte_only_at_fps=50) check_common_defaults() self.assertEqual(auditor_worker.diskfile_mgr.disk_chunk_size, 4096) self.assertEqual(auditor_worker.max_files_per_second, 50) self.assertEqual(auditor_worker.zero_byte_only_at_fps, 50) def test_object_audit_extra_data(self): def run_tests(disk_file): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) data = '0' * 1024 etag = md5() with disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() timestamp = str(normalize_timestamp(time.time())) metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) pre_quarantines = auditor_worker.quarantines auditor_worker.object_audit( AuditLocation(disk_file._datadir, 'sda', '0', policy=POLICIES.legacy)) self.assertEqual(auditor_worker.quarantines, pre_quarantines) os.write(writer._fd, 'extra_data') auditor_worker.object_audit( AuditLocation(disk_file._datadir, 'sda', '0', policy=POLICIES.legacy)) self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1) run_tests(self.disk_file) run_tests(self.disk_file_p1) def test_object_audit_diff_data(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) data = '0' * 1024 etag = md5() timestamp = str(normalize_timestamp(time.time())) with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) pre_quarantines = auditor_worker.quarantines # remake so it will have metadata self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o', policy=POLICIES.legacy) auditor_worker.object_audit( AuditLocation(self.disk_file._datadir, 'sda', '0', policy=POLICIES.legacy)) self.assertEqual(auditor_worker.quarantines, pre_quarantines) etag = md5() etag.update('1' + '0' * 1023) etag = etag.hexdigest() metadata['ETag'] = etag with self.disk_file.create() as writer: writer.write(data) writer.put(metadata) auditor_worker.object_audit( AuditLocation(self.disk_file._datadir, 'sda', '0', policy=POLICIES.legacy)) self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1) def test_object_audit_no_meta(self): timestamp = str(normalize_timestamp(time.time())) path = os.path.join(self.disk_file._datadir, timestamp + '.data') mkdirs(self.disk_file._datadir) fp = open(path, 'w') fp.write('0' * 1024) fp.close() invalidate_hash(os.path.dirname(self.disk_file._datadir)) auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) pre_quarantines = auditor_worker.quarantines auditor_worker.object_audit( AuditLocation(self.disk_file._datadir, 'sda', '0', policy=POLICIES.legacy)) self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1) def test_object_audit_will_not_swallow_errors_in_tests(self): timestamp = str(normalize_timestamp(time.time())) path = os.path.join(self.disk_file._datadir, timestamp + '.data') mkdirs(self.disk_file._datadir) with open(path, 'w') as f: write_metadata(f, {'name': '/a/c/o'}) auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) def blowup(*args): raise NameError('tpyo') with mock.patch.object(DiskFileManager, 'get_diskfile_from_audit_location', blowup): self.assertRaises(NameError, auditor_worker.object_audit, AuditLocation(os.path.dirname(path), 'sda', '0', policy=POLICIES.legacy)) def test_failsafe_object_audit_will_swallow_errors_in_tests(self): timestamp = str(normalize_timestamp(time.time())) path = os.path.join(self.disk_file._datadir, timestamp + '.data') mkdirs(self.disk_file._datadir) with open(path, 'w') as f: write_metadata(f, {'name': '/a/c/o'}) auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) def blowup(*args): raise NameError('tpyo') with mock.patch('swift.obj.diskfile.DiskFileManager.diskfile_cls', blowup): auditor_worker.failsafe_object_audit( AuditLocation(os.path.dirname(path), 'sda', '0', policy=POLICIES.legacy)) self.assertEqual(auditor_worker.errors, 1) def test_generic_exception_handling(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) # pretend that we logged (and reset counters) just now auditor_worker.last_logged = time.time() timestamp = str(normalize_timestamp(time.time())) pre_errors = auditor_worker.errors data = '0' * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) with mock.patch('swift.obj.diskfile.DiskFileManager.diskfile_cls', lambda *_: 1 / 0): auditor_worker.audit_all_objects() self.assertEqual(auditor_worker.errors, pre_errors + 1) def test_object_run_once_pass(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) auditor_worker.log_time = 0 timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines data = '0' * 1024 def write_file(df): etag = md5() with df.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) # policy 0 write_file(self.disk_file) # policy 1 write_file(self.disk_file_p1) auditor_worker.audit_all_objects() self.assertEqual(auditor_worker.quarantines, pre_quarantines) # 1 object per policy falls into 1024 bucket self.assertEqual(auditor_worker.stats_buckets[1024], 2) self.assertEqual(auditor_worker.stats_buckets[10240], 0) # pick up some additional code coverage, large file data = '0' * 1024 * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb']) self.assertEqual(auditor_worker.quarantines, pre_quarantines) # still have the 1024 byte object left in policy-1 (plus the # stats from the original 2) self.assertEqual(auditor_worker.stats_buckets[1024], 3) self.assertEqual(auditor_worker.stats_buckets[10240], 0) # and then policy-0 disk_file was re-written as a larger object self.assertEqual(auditor_worker.stats_buckets['OVER'], 1) # pick up even more additional code coverage, misc paths auditor_worker.log_time = -1 auditor_worker.stats_sizes = [] auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb']) self.assertEqual(auditor_worker.quarantines, pre_quarantines) self.assertEqual(auditor_worker.stats_buckets[1024], 3) self.assertEqual(auditor_worker.stats_buckets[10240], 0) self.assertEqual(auditor_worker.stats_buckets['OVER'], 1) def test_object_run_logging(self): logger = FakeLogger() auditor_worker = auditor.AuditorWorker(self.conf, logger, self.rcache, self.devices) auditor_worker.audit_all_objects(device_dirs=['sda']) log_lines = logger.get_lines_for_level('info') self.assertTrue(len(log_lines) > 0) self.assertTrue(log_lines[0].index('ALL - parallel, sda')) logger = FakeLogger() auditor_worker = auditor.AuditorWorker(self.conf, logger, self.rcache, self.devices, zero_byte_only_at_fps=50) auditor_worker.audit_all_objects(device_dirs=['sda']) log_lines = logger.get_lines_for_level('info') self.assertTrue(len(log_lines) > 0) self.assertTrue(log_lines[0].index('ZBF - sda')) def test_object_run_once_no_sda(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines # pretend that we logged (and reset counters) just now auditor_worker.last_logged = time.time() data = '0' * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) os.write(writer._fd, 'extra_data') auditor_worker.audit_all_objects() self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1) def test_object_run_once_multi_devices(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) # pretend that we logged (and reset counters) just now auditor_worker.last_logged = time.time() timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines data = '0' * 10 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) auditor_worker.audit_all_objects() self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'ob', policy=POLICIES.legacy) data = '1' * 10 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) os.write(writer._fd, 'extra_data') auditor_worker.audit_all_objects() self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1) def test_object_run_fast_track_non_zero(self): self.auditor = auditor.ObjectAuditor(self.conf) self.auditor.log_time = 0 data = '0' * 1024 etag = md5() with self.disk_file.create() as writer: writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': str(normalize_timestamp(time.time())), 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) etag = md5() etag.update('1' + '0' * 1023) etag = etag.hexdigest() metadata['ETag'] = etag write_metadata(writer._fd, metadata) quarantine_path = os.path.join(self.devices, 'sda', 'quarantined', 'objects') kwargs = {'mode': 'once'} kwargs['zero_byte_fps'] = 50 self.auditor.run_audit(**kwargs) self.assertFalse(os.path.isdir(quarantine_path)) del(kwargs['zero_byte_fps']) self.auditor.run_audit(**kwargs) self.assertTrue(os.path.isdir(quarantine_path)) def setup_bad_zero_byte(self, timestamp=None): if timestamp is None: timestamp = Timestamp(time.time()) self.auditor = auditor.ObjectAuditor(self.conf) self.auditor.log_time = 0 etag = md5() with self.disk_file.create() as writer: etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': timestamp.internal, 'Content-Length': 10, } writer.put(metadata) etag = md5() etag = etag.hexdigest() metadata['ETag'] = etag write_metadata(writer._fd, metadata) def test_object_run_fast_track_all(self): self.setup_bad_zero_byte() kwargs = {'mode': 'once'} self.auditor.run_audit(**kwargs) quarantine_path = os.path.join(self.devices, 'sda', 'quarantined', 'objects') self.assertTrue(os.path.isdir(quarantine_path)) def test_object_run_fast_track_zero(self): self.setup_bad_zero_byte() kwargs = {'mode': 'once'} kwargs['zero_byte_fps'] = 50 self.auditor.run_audit(**kwargs) quarantine_path = os.path.join(self.devices, 'sda', 'quarantined', 'objects') self.assertTrue(os.path.isdir(quarantine_path)) def test_object_run_fast_track_zero_check_closed(self): rat = [False] class FakeFile(DiskFile): def _quarantine(self, data_file, msg): rat[0] = True DiskFile._quarantine(self, data_file, msg) self.setup_bad_zero_byte() with mock.patch('swift.obj.diskfile.DiskFileManager.diskfile_cls', FakeFile): kwargs = {'mode': 'once'} kwargs['zero_byte_fps'] = 50 self.auditor.run_audit(**kwargs) quarantine_path = os.path.join(self.devices, 'sda', 'quarantined', 'objects') self.assertTrue(os.path.isdir(quarantine_path)) self.assertTrue(rat[0]) @mock.patch.object(auditor.ObjectAuditor, 'run_audit') @mock.patch('os.fork', return_value=0) def test_with_inaccessible_object_location(self, mock_os_fork, mock_run_audit): # Need to ensure that any failures in run_audit do # not prevent sys.exit() from running. Otherwise we get # zombie processes. e = OSError('permission denied') mock_run_audit.side_effect = e self.auditor = auditor.ObjectAuditor(self.conf) self.assertRaises(SystemExit, self.auditor.fork_child, self) def test_with_only_tombstone(self): # sanity check that auditor doesn't touch solitary tombstones ts_iter = make_timestamp_iter() self.setup_bad_zero_byte(timestamp=ts_iter.next()) self.disk_file.delete(ts_iter.next()) files = os.listdir(self.disk_file._datadir) self.assertEqual(1, len(files)) self.assertTrue(files[0].endswith('ts')) kwargs = {'mode': 'once'} self.auditor.run_audit(**kwargs) files_after = os.listdir(self.disk_file._datadir) self.assertEqual(files, files_after) def test_with_tombstone_and_data(self): # rsync replication could leave a tombstone and data file in object # dir - verify they are both removed during audit ts_iter = make_timestamp_iter() ts_tomb = ts_iter.next() ts_data = ts_iter.next() self.setup_bad_zero_byte(timestamp=ts_data) tomb_file_path = os.path.join(self.disk_file._datadir, '%s.ts' % ts_tomb.internal) with open(tomb_file_path, 'wb') as fd: write_metadata(fd, {'X-Timestamp': ts_tomb.internal}) files = os.listdir(self.disk_file._datadir) self.assertEqual(2, len(files)) self.assertTrue(os.path.basename(tomb_file_path) in files, files) kwargs = {'mode': 'once'} self.auditor.run_audit(**kwargs) self.assertFalse(os.path.exists(self.disk_file._datadir)) def test_sleeper(self): with mock.patch( 'time.sleep', mock.MagicMock()) as mock_sleep: auditor.SLEEP_BETWEEN_AUDITS = 0.10 my_auditor = auditor.ObjectAuditor(self.conf) my_auditor._sleep() mock_sleep.assert_called_with(auditor.SLEEP_BETWEEN_AUDITS) def test_run_parallel_audit(self): class StopForever(Exception): pass class Bogus(Exception): pass class ObjectAuditorMock(object): check_args = () check_kwargs = {} check_device_dir = None fork_called = 0 master = 0 wait_called = 0 def mock_run(self, *args, **kwargs): self.check_args = args self.check_kwargs = kwargs if 'zero_byte_fps' in kwargs: self.check_device_dir = kwargs.get('device_dirs') def mock_sleep_stop(self): raise StopForever('stop') def mock_sleep_continue(self): return def mock_audit_loop_error(self, parent, zbo_fps, override_devices=None, **kwargs): raise Bogus('exception') def mock_fork(self): self.fork_called += 1 if self.master: return self.fork_called else: return 0 def mock_wait(self): self.wait_called += 1 return (self.wait_called, 0) for i in string.ascii_letters[2:26]: mkdirs(os.path.join(self.devices, 'sd%s' % i)) my_auditor = auditor.ObjectAuditor(dict(devices=self.devices, mount_check='false', zero_byte_files_per_second=89, concurrency=1)) mocker = ObjectAuditorMock() my_auditor.logger.exception = mock.MagicMock() real_audit_loop = my_auditor.audit_loop my_auditor.audit_loop = mocker.mock_audit_loop_error my_auditor.run_audit = mocker.mock_run was_fork = os.fork was_wait = os.wait os.fork = mocker.mock_fork os.wait = mocker.mock_wait try: my_auditor._sleep = mocker.mock_sleep_stop my_auditor.run_once(zero_byte_fps=50) my_auditor.logger.exception.assert_called_once_with( 'ERROR auditing: exception') my_auditor.logger.exception.reset_mock() self.assertRaises(StopForever, my_auditor.run_forever) my_auditor.logger.exception.assert_called_once_with( 'ERROR auditing: exception') my_auditor.audit_loop = real_audit_loop self.assertRaises(StopForever, my_auditor.run_forever, zero_byte_fps=50) self.assertEqual(mocker.check_kwargs['zero_byte_fps'], 50) self.assertEqual(mocker.fork_called, 0) self.assertRaises(SystemExit, my_auditor.run_once) self.assertEqual(mocker.fork_called, 1) self.assertEqual(mocker.check_kwargs['zero_byte_fps'], 89) self.assertEqual(mocker.check_device_dir, []) self.assertEqual(mocker.check_args, ()) device_list = ['sd%s' % i for i in string.ascii_letters[2:10]] device_string = ','.join(device_list) device_string_bogus = device_string + ',bogus' mocker.fork_called = 0 self.assertRaises(SystemExit, my_auditor.run_once, devices=device_string_bogus) self.assertEqual(mocker.fork_called, 1) self.assertEqual(mocker.check_kwargs['zero_byte_fps'], 89) self.assertEqual(sorted(mocker.check_device_dir), device_list) mocker.master = 1 mocker.fork_called = 0 self.assertRaises(StopForever, my_auditor.run_forever) # Fork is called 2 times since the zbf process is forked just # once before self._sleep() is called and StopForever is raised # Also wait is called just once before StopForever is raised self.assertEqual(mocker.fork_called, 2) self.assertEqual(mocker.wait_called, 1) my_auditor._sleep = mocker.mock_sleep_continue my_auditor.concurrency = 2 mocker.fork_called = 0 mocker.wait_called = 0 my_auditor.run_once() # Fork is called no. of devices + (no. of devices)/2 + 1 times # since zbf process is forked (no.of devices)/2 + 1 times no_devices = len(os.listdir(self.devices)) self.assertEqual(mocker.fork_called, no_devices + no_devices / 2 + 1) self.assertEqual(mocker.wait_called, no_devices + no_devices / 2 + 1) finally: os.fork = was_fork os.wait = was_wait