def __init__(self, conf, logger=None): """ :param conf: configuration object obtained from ConfigParser :param logger: logging object """ self.conf = conf self.logger = PrefixLoggerAdapter( logger or get_logger(conf, log_route='object-replicator'), {}) self.devices_dir = conf.get('devices', '/srv/node') self.mount_check = config_true_value(conf.get('mount_check', 'true')) self.swift_dir = conf.get('swift_dir', '/etc/swift') self.bind_ip = conf.get('bind_ip', '0.0.0.0') self.servers_per_port = int(conf.get('servers_per_port', '0') or 0) self.port = None if self.servers_per_port else \ int(conf.get('bind_port', 6200)) self.concurrency = int(conf.get('concurrency', 1)) self.replicator_workers = int(conf.get('replicator_workers', 0)) self.stats_interval = int(conf.get('stats_interval', '300')) self.ring_check_interval = int(conf.get('ring_check_interval', 15)) self.next_check = time.time() + self.ring_check_interval self.replication_cycle = random.randint(0, 9) self.partition_times = [] self.interval = int( conf.get('interval') or conf.get('run_pause') or 30) self.rsync_timeout = int( conf.get('rsync_timeout', DEFAULT_RSYNC_TIMEOUT)) self.rsync_io_timeout = conf.get('rsync_io_timeout', '30') self.rsync_bwlimit = conf.get('rsync_bwlimit', '0') self.rsync_compress = config_true_value( conf.get('rsync_compress', 'no')) self.rsync_module = conf.get('rsync_module', '').rstrip('/') if not self.rsync_module: self.rsync_module = '{replication_ip}::object' self.http_timeout = int(conf.get('http_timeout', 60)) self.recon_cache_path = conf.get('recon_cache_path', '/var/cache/swift') self.rcache = os.path.join(self.recon_cache_path, "object.recon") self._next_rcache_update = time.time() + self.stats_interval self.conn_timeout = float(conf.get('conn_timeout', 0.5)) self.node_timeout = float(conf.get('node_timeout', 10)) self.sync_method = getattr(self, conf.get('sync_method') or 'rsync') self.network_chunk_size = int(conf.get('network_chunk_size', 65536)) self.default_headers = { 'Content-Length': '0', 'user-agent': 'object-replicator %s' % os.getpid() } self.rsync_error_log_line_length = \ int(conf.get('rsync_error_log_line_length', 0)) self.handoffs_first = config_true_value( conf.get('handoffs_first', False)) self.handoff_delete = config_auto_int_value( conf.get('handoff_delete', 'auto'), 0) if any((self.handoff_delete, self.handoffs_first)): self.logger.warning('Handoff only mode is not intended for normal ' 'operation, please disable handoffs_first and ' 'handoff_delete before the next ' 'normal rebalance') self.is_multiprocess_worker = None self._df_router = DiskFileRouter(conf, self.logger) self._child_process_reaper_queue = queue.LightQueue()
def sync_device(args, device): logging.info('worker started!') policy = POLICIES[args.policy_index] conf = { 'devices': args.devices, 'mount_check': False, } df_mgr = DiskFileRouter(conf, logging)[policy] logging.debug('connecting') channel = grpc.insecure_channel('{}:{}'.format(args.bind_ip, args.port)) stub = pb2_grpc.SyncStub(channel) logging.debug('connected') feeder_q = Queue(args.threads_per_dev * 10) finished_q = Queue(args.threads_per_dev * 10) consumer = Thread(target=consume, args=(finished_q, )) consumer.start() workers = [] for i in range(args.threads_per_dev): t = Thread(target=sync_parts, args=(args, stub, df_mgr, feeder_q, finished_q)) t.start() workers.append(t) logging.debug('feeding queue') try: for part_info in iter_parts(args, device): logging.debug('doing part_info %r', part_info) feeder_q.put(part_info) finally: for t in workers: feeder_q.put(None) for t in workers: t.join() finished_q.put(None) consumer.join()
def __init__(self, root): conf = { 'devices': os.path.abspath(root), 'mount_check': False, } logger = get_logger({}) self.df_router = DiskFileRouter(conf, logger) super(SyncServicer, self).__init__()
def filler(args, q): if args.verbose: level = logging.DEBUG else: level = logging.INFO logging.basicConfig(level=level) policy = POLICIES[args.policy_index] conf = { 'devices': args.devices, 'mount_check': False, } df_mgr = DiskFileRouter(conf, logging)[policy] if args.user: drop_privileges(args.user) while True: df_spec = q.get() if df_spec is None: return df = df_mgr.get_diskfile( df_spec['device'], df_spec['part'], df_spec['account'], df_spec['container'], df_spec['obj'], POLICIES[df_spec['policy_index']], ) with df.create() as writer: timestamp = Timestamp(time.time()) remaining = args.size hasher = hashlib.md5() while remaining > 0: chunk = BUFFER[:remaining] writer.write(chunk) hasher.update(chunk) remaining -= len(chunk) metadata = { 'ETag': hasher.hexdigest(), 'X-Timestamp': timestamp.internal, 'Content-Length': str(args.size), } writer.put(metadata) writer.commit(timestamp) logging.debug( 'created ' '/%(device)s/%(part)s/%(account)s/%(container)s/%(obj)s', df_spec)
def __init__(self, conf, logger=None): """ :param conf: configuration object obtained from ConfigParser :param logger: logging object """ self.conf = conf self.logger = logger or get_logger(conf, log_route='object-reconstructor') self.devices_dir = conf.get('devices', '/srv/node') self.mount_check = config_true_value(conf.get('mount_check', 'true')) self.swift_dir = conf.get('swift_dir', '/etc/swift') self.bind_ip = conf.get('bind_ip', '0.0.0.0') self.servers_per_port = int(conf.get('servers_per_port', '0') or 0) self.port = None if self.servers_per_port else \ int(conf.get('bind_port', 6200)) self.concurrency = int(conf.get('concurrency', 1)) self.stats_interval = int(conf.get('stats_interval', '300')) self.ring_check_interval = int(conf.get('ring_check_interval', 15)) self.next_check = time.time() + self.ring_check_interval self.partition_times = [] self.interval = int( conf.get('interval') or conf.get('run_pause') or 30) self.http_timeout = int(conf.get('http_timeout', 60)) self.lockup_timeout = int(conf.get('lockup_timeout', 1800)) self.recon_cache_path = conf.get('recon_cache_path', '/var/cache/swift') self.rcache = os.path.join(self.recon_cache_path, "object.recon") # defaults subject to change after beta self.conn_timeout = float(conf.get('conn_timeout', 0.5)) self.node_timeout = float(conf.get('node_timeout', 10)) self.network_chunk_size = int(conf.get('network_chunk_size', 65536)) self.disk_chunk_size = int(conf.get('disk_chunk_size', 65536)) self.headers = { 'Content-Length': '0', 'user-agent': 'obj-reconstructor %s' % os.getpid() } if 'handoffs_first' in conf: self.logger.warning( 'The handoffs_first option is deprecated in favor ' 'of handoffs_only. This option may be ignored in a ' 'future release.') # honor handoffs_first for backwards compatibility default_handoffs_only = config_true_value(conf['handoffs_first']) else: default_handoffs_only = False self.handoffs_only = config_true_value( conf.get('handoffs_only', default_handoffs_only)) if self.handoffs_only: self.logger.warning('Handoff only mode is not intended for normal ' 'operation, use handoffs_only with care.') elif default_handoffs_only: self.logger.warning('Ignored handoffs_first option in favor ' 'of handoffs_only.') self._df_router = DiskFileRouter(conf, self.logger)
def __init__(self, conf, logger=None): """ :param conf: configuration object obtained from ConfigParser :param logger: logging object """ self.conf = conf self.logger = logger or get_logger(conf, log_route='object-reconstructor') self.devices_dir = conf.get('devices', '/srv/node') self.mount_check = config_true_value(conf.get('mount_check', 'true')) self.swift_dir = conf.get('swift_dir', '/etc/swift') self.bind_ip = conf.get('bind_ip', '0.0.0.0') self.servers_per_port = int(conf.get('servers_per_port', '0') or 0) self.port = None if self.servers_per_port else \ int(conf.get('bind_port', 6000)) self.concurrency = int(conf.get('concurrency', 1)) self.stats_interval = int(conf.get('stats_interval', '300')) self.ring_check_interval = int(conf.get('ring_check_interval', 15)) self.next_check = time.time() + self.ring_check_interval self.reclaim_age = int(conf.get('reclaim_age', 86400 * 7)) self.partition_times = [] self.interval = int( conf.get('interval') or conf.get('run_pause') or 30) self.http_timeout = int(conf.get('http_timeout', 60)) self.lockup_timeout = int(conf.get('lockup_timeout', 1800)) self.recon_cache_path = conf.get('recon_cache_path', '/var/cache/swift') self.rcache = os.path.join(self.recon_cache_path, "object.recon") # defaults subject to change after beta self.conn_timeout = float(conf.get('conn_timeout', 0.5)) self.node_timeout = float(conf.get('node_timeout', 10)) self.network_chunk_size = int(conf.get('network_chunk_size', 65536)) self.disk_chunk_size = int(conf.get('disk_chunk_size', 65536)) self.headers = { 'Content-Length': '0', 'user-agent': 'obj-reconstructor %s' % os.getpid() } self.handoffs_first = config_true_value( conf.get('handoffs_first', False)) self._df_router = DiskFileRouter(conf, self.logger)
def setup(self, conf): """ Implementation specific setup. This method is called at the very end by the constructor to allow a specific implementation to modify existing attributes or add its own attributes. :param conf: WSGI configuration parameter """ # Common on-disk hierarchy shared across account, container and object # servers. self._diskfile_router = DiskFileRouter(conf, self.logger) # This is populated by global_conf_callback way below as the semaphore # is shared by all workers. if 'replication_semaphore' in conf: # The value was put in a list so it could get past paste self.replication_semaphore = conf['replication_semaphore'][0] else: self.replication_semaphore = None self.replication_failure_threshold = int( conf.get('replication_failure_threshold') or 100) self.replication_failure_ratio = float( conf.get('replication_failure_ratio') or 1.0)
def test_state_file(self): device_path = os.path.join(self.devices, self.existing_device) datadir = 'objects' datadir_path = os.path.join(device_path, datadir) state_file = os.path.join(device_path, 'relink.%s.json' % datadir) def call_partition_filter(part_power, next_part_power, parts): # Partition 312 will be ignored because it must have been created # by the relinker return relinker.partitions_filter(states, part_power, next_part_power, datadir_path, parts) # Start relinking states = { "part_power": PART_POWER, "next_part_power": PART_POWER + 1, "state": {} } # Load the states: As it starts, it must be empty locks = [None] relinker.hook_pre_device(locks, states, datadir, device_path) self.assertEqual({}, states["state"]) os.close(locks[0]) # Release the lock # Partition 312 is ignored because it must have been created with the # next_part_power, so it does not need to be relinked # 96 and 227 are reverse ordered # auditor_status_ALL.json is ignored because it's not a partition self.assertEqual( ['227', '96'], call_partition_filter(PART_POWER, PART_POWER + 1, ['96', '227', '312', 'auditor_status.json'])) self.assertEqual(states["state"], {'96': False, '227': False}) pol = POLICIES[0] mgr = DiskFileRouter({ 'devices': self.devices, 'mount_check': False }, self.logger)[pol] # Ack partition 96 relinker.hook_post_partition(states, relinker.STEP_RELINK, pol, mgr, os.path.join(datadir_path, '96')) self.assertEqual(states["state"], {'96': True, '227': False}) with open(state_file, 'rt') as f: self.assertEqual( json.load(f), { "part_power": PART_POWER, "next_part_power": PART_POWER + 1, "state": { '96': True, '227': False } }) # Restart relinking after only part 96 was done self.assertEqual(['227'], call_partition_filter(PART_POWER, PART_POWER + 1, ['96', '227', '312'])) self.assertEqual(states["state"], {'96': True, '227': False}) # Ack partition 227 relinker.hook_post_partition(states, relinker.STEP_RELINK, pol, mgr, os.path.join(datadir_path, '227')) self.assertEqual(states["state"], {'96': True, '227': True}) with open(state_file, 'rt') as f: self.assertEqual( json.load(f), { "part_power": PART_POWER, "next_part_power": PART_POWER + 1, "state": { '96': True, '227': True } }) # If the process restarts, it reload the state locks = [None] states = { "part_power": PART_POWER, "next_part_power": PART_POWER + 1, "state": {}, } relinker.hook_pre_device(locks, states, datadir, device_path) self.assertEqual( states, { "part_power": PART_POWER, "next_part_power": PART_POWER + 1, "state": { '96': True, '227': True } }) os.close(locks[0]) # Release the lock # Start cleanup -- note that part_power and next_part_power now match! states = { "part_power": PART_POWER + 1, "next_part_power": PART_POWER + 1, "state": {}, } # ...which means our state file was ignored relinker.hook_pre_device(locks, states, datadir, device_path) self.assertEqual( states, { "prev_part_power": PART_POWER, "part_power": PART_POWER + 1, "next_part_power": PART_POWER + 1, "state": {} }) os.close(locks[0]) # Release the lock self.assertEqual(['227', '96'], call_partition_filter(PART_POWER + 1, PART_POWER + 1, ['96', '227', '312'])) # Ack partition 227 relinker.hook_post_partition(states, relinker.STEP_CLEANUP, pol, mgr, os.path.join(datadir_path, '227')) self.assertEqual(states["state"], {'96': False, '227': True}) with open(state_file, 'rt') as f: self.assertEqual( json.load(f), { "prev_part_power": PART_POWER, "part_power": PART_POWER + 1, "next_part_power": PART_POWER + 1, "state": { '96': False, '227': True } }) # Restart cleanup after only part 227 was done self.assertEqual(['96'], call_partition_filter(PART_POWER + 1, PART_POWER + 1, ['96', '227', '312'])) self.assertEqual(states["state"], {'96': False, '227': True}) # Ack partition 96 relinker.hook_post_partition(states, relinker.STEP_CLEANUP, pol, mgr, os.path.join(datadir_path, '96')) self.assertEqual(states["state"], {'96': True, '227': True}) with open(state_file, 'rt') as f: self.assertEqual( json.load(f), { "prev_part_power": PART_POWER, "part_power": PART_POWER + 1, "next_part_power": PART_POWER + 1, "state": { '96': True, '227': True } }) # At the end, the state is still accurate locks = [None] states = { "prev_part_power": PART_POWER, "part_power": PART_POWER + 1, "next_part_power": PART_POWER + 1, "state": {}, } relinker.hook_pre_device(locks, states, datadir, device_path) self.assertEqual(states["state"], {'96': True, '227': True}) os.close(locks[0]) # Release the lock # If the part_power/next_part_power tuple differs, restart from scratch locks = [None] states = { "part_power": PART_POWER + 1, "next_part_power": PART_POWER + 2, "state": {}, } relinker.hook_pre_device(locks, states, datadir, device_path) self.assertEqual(states["state"], {}) self.assertFalse(os.path.exists(state_file)) os.close(locks[0]) # Release the lock # If the file gets corrupted, restart from scratch with open(state_file, 'wt') as f: f.write('NOT JSON') locks = [None] states = { "part_power": PART_POWER, "next_part_power": PART_POWER + 1, "state": {} } relinker.hook_pre_device(locks, states, datadir, device_path) self.assertEqual(states["state"], {}) self.assertFalse(os.path.exists(state_file)) os.close(locks[0]) # Release the lock