def collect_jobs(self, override_devices=None, override_partitions=None, override_policies=None): """ Returns a sorted list of jobs (dictionaries) that specify the partitions, nodes, etc to be rsynced. :param override_devices: if set, only jobs on these devices will be returned :param override_partitions: if set, only jobs on these partitions will be returned :param override_policies: if set, only jobs in these storage policies will be returned """ jobs = [] ips = whataremyips(self.bind_ip) for policy in POLICIES: if policy.policy_type == REPL_POLICY: if (override_policies is not None and str(policy.idx) not in override_policies): continue # ensure rings are loaded for policy self.load_object_ring(policy) jobs += self.build_replication_jobs( policy, ips, override_devices=override_devices, override_partitions=override_partitions) random.shuffle(jobs) if self.handoffs_first: # Move the handoff parts to the front of the list jobs.sort(key=lambda job: not job['delete']) self.job_count = len(jobs) return jobs
def collect_jobs(self, override_devices=None, override_partitions=None, override_policies=None): """ Returns a sorted list of jobs (dictionaries) that specify the partitions, nodes, etc to be rsynced. :param override_devices: if set, only jobs on these devices will be returned :param override_partitions: if set, only jobs on these partitions will be returned :param override_policies: if set, only jobs in these storage policies will be returned """ jobs = [] ips = whataremyips() for policy in POLICIES: if (override_policies is not None and str(policy.idx) not in override_policies): continue # may need to branch here for future policy types jobs += self.process_repl(policy, ips, override_devices=override_devices, override_partitions=override_partitions) random.shuffle(jobs) if self.handoffs_first: # Move the handoff parts to the front of the list jobs.sort(key=lambda job: not job['delete']) self.job_count = len(jobs) return jobs
def run_once(self, *args, **kwargs): """Run a replication pass once.""" self._zero_stats() dirs = [] ips = whataremyips() if not ips: self.logger.error(_('ERROR Failed to get my own IPs?')) return for node in self.ring.devs: if node and node['ip'] in ips and node['port'] == self.port: if self.mount_check and not os.path.ismount( os.path.join(self.root, node['device'])): self.logger.warn( _('Skipping %(device)s as it is not mounted') % node) continue unlink_older_than( os.path.join(self.root, node['device'], 'tmp'), time.time() - self.reclaim_age) datadir = os.path.join(self.root, node['device'], self.datadir) if os.path.isdir(datadir): dirs.append((datadir, node['id'])) self.logger.info(_('Beginning replication run')) for part, object_file, node_id in self.roundrobin_datadirs(dirs): self.cpool.spawn_n( self._replicate_object, part, object_file, node_id) self.cpool.waitall() self.logger.info(_('Replication run OVER')) self._report_stats()
def __init__(self, conf, handler_factory, logger=None): if not handler_factory: raise RuntimeError('Handler class must be defined') self.logger = logger self.conf = conf self.root = conf['devices'] self.bulk = config_true_value(conf.get('bulk_process', False)) self.interval = 10 self.swift_dir = '/etc/swift' self.container_ring = Ring(self.swift_dir, ring_name='container') self.status_dir = conf['status_dir'] self.myips = whataremyips(conf.get('swift_bind_ip', '0.0.0.0')) self.items_chunk = conf['items_chunk'] # Verification slack is specified in minutes. self._verification_slack = conf.get('verification_slack', 0) * 60 self.poll_interval = conf.get('poll_interval', 5) self.handler_factory = handler_factory # NOTE: this structure is not protected. Since we use green threads, we # expect a context switch to only occur on blocking calls, so the set # operations should be safe in this context. This can lead to skipping # container cycles unnecessarily if the threading model changes. self._in_progress_containers = set() if self.bulk: self.workers = 1 self._init_workers(conf) self._init_ic_pool(conf) self.log('debug', 'Created the Container Crawler instance')
def __init__(self, conf, handler_class, logger=None): if not handler_class: raise RuntimeError('Handler class must be defined') self.logger = logger self.conf = conf self.root = conf['devices'] self.bulk = config_true_value(conf.get('bulk_process', False)) self.interval = 10 self.swift_dir = '/etc/swift' self.container_ring = Ring(self.swift_dir, ring_name='container') self.status_dir = conf['status_dir'] self.myips = whataremyips('0.0.0.0') self.items_chunk = conf['items_chunk'] self.poll_interval = conf.get('poll_interval', 5) self.handler_class = handler_class self._in_progress_containers = set() if self.bulk: self.workers = 1 self._init_workers(conf) self._init_ic_pool(conf) self.log('debug', 'Created the Container Crawler instance')
def collect_jobs(self): """ Returns a sorted list of jobs (dictionaries) that specify the partitions, nodes, etc to be rsynced. """ jobs = [] ips = whataremyips() for local_dev in [dev for dev in self.object_ring.devs if dev and dev['ip'] in ips and dev['port'] == self.port]: dev_path = join(self.devices_dir, local_dev['device']) obj_path = join(dev_path, 'objects') tmp_path = join(dev_path, 'tmp') if self.mount_check and not os.path.ismount(dev_path): self.logger.warn(_('%s is not mounted'), local_dev['device']) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): continue for partition in os.listdir(obj_path): try: nodes = [node for node in self.object_ring.get_part_nodes(int(partition)) if node['id'] != local_dev['id']] jobs.append(dict(path=join(obj_path, partition), nodes=nodes, delete=len(nodes) > self.object_ring.replica_count - 1, partition=partition)) except ValueError: continue random.shuffle(jobs) # Partititons that need to be deleted take priority jobs.sort(key=lambda job: not job['delete']) self.job_count = len(jobs) return jobs
def run_once(self, *args, **kwargs): """Run a replication pass once.""" self._zero_stats() dirs = [] ips = whataremyips() if not ips: self.logger.error(_('ERROR Failed to get my own IPs?')) return for node in self.ring.devs: if (node and node['replication_ip'] in ips and node['replication_port'] == self.port): if self.mount_check and not os.path.ismount( os.path.join(self.root, node['device'])): self.logger.warn( _('Skipping %(device)s as it is not mounted') % node) continue unlink_older_than( os.path.join(self.root, node['device'], 'tmp'), time.time() - self.reclaim_age) datadir = os.path.join(self.root, node['device'], self.datadir) if os.path.isdir(datadir): dirs.append((datadir, node['id'])) self.logger.info(_('Beginning replication run')) for part, object_file, node_id in roundrobin_datadirs(dirs): self.cpool.spawn_n(self._replicate_object, part, object_file, node_id) self.cpool.waitall() self.logger.info(_('Replication run OVER')) self._report_stats()
def __init__(self, conf, logger=None): self.conf = conf self.logger = logger or get_logger(conf, log_route='account-reaper') self.devices = conf.get('devices', '/srv/node') self.mount_check = config_true_value(conf.get('mount_check', 'true')) self.interval = int(conf.get('interval', 3600)) self.swift_dir = conf.get('swift_dir', '/etc/swift') self.account_ring = None self.container_ring = None self.object_ring = None self.node_timeout = float(conf.get('node_timeout', 10)) self.conn_timeout = float(conf.get('conn_timeout', 0.5)) self.myips = whataremyips(conf.get('bind_ip', '0.0.0.0')) self.bind_port = int(conf.get('bind_port', 6202)) self.concurrency = int(conf.get('concurrency', 25)) self.container_concurrency = self.object_concurrency = \ sqrt(self.concurrency) self.container_pool = GreenPool(size=self.container_concurrency) swift.common.db.DB_PREALLOCATION = \ config_true_value(conf.get('db_preallocation', 'f')) self.delay_reaping = int(conf.get('delay_reaping') or 0) reap_warn_after = float(conf.get('reap_warn_after') or 86400 * 30) self.reap_not_done_after = reap_warn_after + self.delay_reaping self.start_time = time() self.reset_stats()
def collect_parts(self, override_devices=None, override_partitions=None): """ Helper for yielding partitions in the top level reconstructor """ override_devices = override_devices or [] override_partitions = override_partitions or [] ips = whataremyips() for policy in POLICIES: if policy.policy_type != EC_POLICY: continue self._diskfile_mgr = self._df_router[policy] self.load_object_ring(policy) data_dir = get_data_dir(policy) local_devices = itertools.ifilter( lambda dev: dev and is_local_device(ips, self.port, dev[ 'replication_ip'], dev['replication_port']), policy.object_ring.devs) for local_dev in local_devices: if override_devices and (local_dev['device'] not in override_devices): continue dev_path = join(self.devices_dir, local_dev['device']) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(int(policy))) if self.mount_check and not ismount(dev_path): self.logger.warn(_('%s is not mounted'), local_dev['device']) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception('Unable to create %s' % obj_path) continue try: partitions = os.listdir(obj_path) except OSError: self.logger.exception('Unable to list partitions in %r' % obj_path) continue for partition in partitions: part_path = join(obj_path, partition) if not (partition.isdigit() and os.path.isdir(part_path)): self.logger.warning( 'Unexpected entity in data dir: %r' % part_path) remove_file(part_path) continue partition = int(partition) if override_partitions and (partition not in override_partitions): continue part_info = { 'local_dev': local_dev, 'policy': policy, 'partition': partition, 'part_path': part_path, } yield part_info
def __init__(self, conf, container_ring=None, object_ring=None): #: The dict of configuration values from the [container-sync] section #: of the container-server.conf. self.conf = conf #: Logger to use for container-sync log lines. self.logger = get_logger(conf, log_route='container-sync') #: Path to the local device mount points. self.devices = conf.get('devices', '/srv/node') #: Indicates whether mount points should be verified as actual mount #: points (normally true, false for tests and SAIO). self.mount_check = config_true_value(conf.get('mount_check', 'true')) #: Minimum time between full scans. This is to keep the daemon from #: running wild on near empty systems. self.interval = int(conf.get('interval', 300)) #: Maximum amount of time to spend syncing a container before moving on #: to the next one. If a conatiner sync hasn't finished in this time, #: it'll just be resumed next scan. self.container_time = int(conf.get('container_time', 60)) #: ContainerSyncCluster instance for validating sync-to values. self.realms_conf = ContainerSyncRealms( os.path.join( conf.get('swift_dir', '/etc/swift'), 'container-sync-realms.conf'), self.logger) #: The list of hosts we're allowed to send syncs to. This can be #: overridden by data in self.realms_conf self.allowed_sync_hosts = [ h.strip() for h in conf.get('allowed_sync_hosts', '127.0.0.1').split(',') if h.strip()] self.http_proxies = [ a.strip() for a in conf.get('sync_proxy', '').split(',') if a.strip()] #: Number of containers with sync turned on that were successfully #: synced. self.container_syncs = 0 #: Number of successful DELETEs triggered. self.container_deletes = 0 #: Number of successful PUTs triggered. self.container_puts = 0 #: Number of containers that didn't have sync turned on. self.container_skips = 0 #: Number of containers that had a failure of some type. self.container_failures = 0 #: Time of last stats report. self.reported = time() swift_dir = conf.get('swift_dir', '/etc/swift') #: swift.common.ring.Ring for locating containers. self.container_ring = container_ring or Ring(swift_dir, ring_name='container') #: swift.common.ring.Ring for locating objects. self.object_ring = object_ring or Ring(swift_dir, ring_name='object') self._myips = whataremyips() self._myport = int(conf.get('bind_port', 6001)) swift.common.db.DB_PREALLOCATION = \ config_true_value(conf.get('db_preallocation', 'f'))
def collect_jobs(self): """ Returns a sorted list of jobs (dictionaries) that specify the partitions, nodes, etc to be synced. """ jobs = [] ips = whataremyips() for local_dev in [ dev for dev in self.object_ring.devs if dev and dev['replication_ip'] in ips and dev['replication_port'] == self.port ]: dev_path = join(self.devices_dir, local_dev['device']) obj_path = join(dev_path, 'objects') tmp_path = join(dev_path, 'tmp') if self.mount_check and not ismount(dev_path): self.logger.warn(_('%s is not mounted'), local_dev['device']) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception('ERROR creating %s' % obj_path) continue for partition in os.listdir(obj_path): try: job_path = join(obj_path, partition) if isfile(job_path): # Clean up any (probably zero-byte) files where a # partition should be. self.logger.warning( 'Removing partition directory ' 'which was a file: %s', job_path) os.remove(job_path) continue part_nodes = \ self.object_ring.get_part_nodes(int(partition)) nodes = [ node for node in part_nodes if node['id'] != local_dev['id'] ] jobs.append( dict(path=job_path, device=local_dev['device'], nodes=nodes, delete=len(nodes) > len(part_nodes) - 1, partition=partition)) except (ValueError, OSError): continue random.shuffle(jobs) if self.handoffs_first: # Move the handoff parts to the front of the list jobs.sort(key=lambda job: not job['delete']) self.job_count = len(jobs) return jobs
def _get_my_replication_ips(self): my_replication_ips = set() ips = whataremyips() for policy in POLICIES: self.load_object_ring(policy) for local_dev in [dev for dev in policy.object_ring.devs if dev and dev['replication_ip'] in ips and dev['replication_port'] == self.port]: my_replication_ips.add(local_dev['replication_ip']) return list(my_replication_ips)
def collect_parts(self, override_devices=None, override_partitions=None): """ Helper for yielding partitions in the top level reconstructor """ override_devices = override_devices or [] override_partitions = override_partitions or [] ips = whataremyips(self.bind_ip) for policy in POLICIES: if policy.policy_type != EC_POLICY: continue self._diskfile_mgr = self._df_router[policy] self.load_object_ring(policy) data_dir = get_data_dir(policy) local_devices = itertools.ifilter( lambda dev: dev and is_local_device(ips, self.port, dev["replication_ip"], dev["replication_port"]), policy.object_ring.devs, ) for local_dev in local_devices: if override_devices and (local_dev["device"] not in override_devices): continue dev_path = self._df_router[policy].get_dev_path(local_dev["device"]) if not dev_path: self.logger.warn(_("%s is not mounted"), local_dev["device"]) continue obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(int(policy))) unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception("Unable to create %s" % obj_path) continue try: partitions = os.listdir(obj_path) except OSError: self.logger.exception("Unable to list partitions in %r" % obj_path) continue for partition in partitions: part_path = join(obj_path, partition) if not (partition.isdigit() and os.path.isdir(part_path)): self.logger.warning("Unexpected entity in data dir: %r" % part_path) remove_file(part_path) continue partition = int(partition) if override_partitions and (partition not in override_partitions): continue part_info = { "local_dev": local_dev, "policy": policy, "partition": partition, "part_path": part_path, } yield part_info
def collect_jobs(self): """ Returns a sorted list of jobs (dictionaries) that specify the partitions, nodes, etc to be synced. """ jobs = [] ips = whataremyips() for local_dev in [dev for dev in self.object_ring.devs if dev and dev['replication_ip'] in ips and dev['replication_port'] == self.port]: dev_path = join(self.devices_dir, local_dev['device']) obj_path = join(dev_path, 'objects') tmp_path = join(dev_path, 'tmp') if self.mount_check and not ismount(dev_path): self.logger.warn(_('%s is not mounted'), local_dev['device']) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception('ERROR creating %s' % obj_path) continue for partition in os.listdir(obj_path): try: job_path = join(obj_path, partition) if isfile(job_path): # Clean up any (probably zero-byte) files where a # partition should be. self.logger.warning('Removing partition directory ' 'which was a file: %s', job_path) os.remove(job_path) continue part_nodes = \ self.object_ring.get_part_nodes(int(partition)) #MODIFIED LightSync for mypos in range(len(part_nodes)): if part_nodes[mypos]['id'] == local_dev['id']: break nodes = part_nodes[mypos+1:]+part_nodes[:mypos] ## jobs.append( dict(path=job_path, device=local_dev['device'], nodes=nodes, delete=len(nodes) > len(part_nodes) - 1, partition=partition)) except (ValueError, OSError): continue random.shuffle(jobs) if self.handoffs_first: # Move the handoff parts to the front of the list jobs.sort(key=lambda job: not job['delete']) self.job_count = len(jobs) return jobs
def run_once(self, *args, **kwargs): """Run a replication pass once.""" self._zero_stats() dirs = [] ips = whataremyips(self.bind_ip) if not ips: self.logger.error(_('ERROR Failed to get my own IPs?')) return if self.handoffs_only: self.logger.warning( 'Starting replication pass with handoffs_only enabled. ' 'This mode is not intended for normal ' 'operation; use handoffs_only with care.') self._local_device_ids = set() found_local = False for node in self.ring.devs: if node and is_local_device(ips, self.port, node['replication_ip'], node['replication_port']): found_local = True if not check_drive(self.root, node['device'], self.mount_check): self._add_failure_stats([ (failure_dev['replication_ip'], failure_dev['device']) for failure_dev in self.ring.devs if failure_dev ]) self.logger.warning( _('Skipping %(device)s as it is not mounted') % node) continue unlink_older_than( os.path.join(self.root, node['device'], 'tmp'), time.time() - self.reclaim_age) datadir = os.path.join(self.root, node['device'], self.datadir) if os.path.isdir(datadir): self._local_device_ids.add(node['id']) filt = (self.handoffs_only_filter(node['id']) if self.handoffs_only else None) dirs.append((datadir, node['id'], filt)) if not found_local: self.logger.error( "Can't find itself %s with port %s in ring " "file, not replicating", ", ".join(ips), self.port) self.logger.info(_('Beginning replication run')) for part, object_file, node_id in roundrobin_datadirs(dirs): self.cpool.spawn_n(self._replicate_object, part, object_file, node_id) self.cpool.waitall() self.logger.info(_('Replication run OVER')) if self.handoffs_only: self.logger.warning( 'Finished replication pass with handoffs_only enabled. ' 'If handoffs_only is no longer required, disable it.') self._report_stats()
def collect_jobs(self): """ Returns a sorted list of jobs (dictionaries) that specify the partitions, nodes, etc to be synced. """ jobs = [] ips = whataremyips() for local_dev in [ dev for dev in self.object_ring.devs if dev and dev["replication_ip"] in ips and dev["replication_port"] == self.port ]: dev_path = join(self.devices_dir, local_dev["device"]) obj_path = join(dev_path, "objects") tmp_path = join(dev_path, "tmp") if self.mount_check and not os.path.ismount(dev_path): self.logger.warn(_("%s is not mounted"), local_dev["device"]) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception("ERROR creating %s" % obj_path) continue for partition in os.listdir(obj_path): try: job_path = join(obj_path, partition) if isfile(job_path): # Clean up any (probably zero-byte) files where a # partition should be. self.logger.warning("Removing partition directory " "which was a file: %s", job_path) os.remove(job_path) continue part_nodes = self.object_ring.get_part_nodes(int(partition)) nodes = [node for node in part_nodes if node["id"] != local_dev["id"]] jobs.append( dict( path=job_path, device=local_dev["device"], nodes=nodes, delete=len(nodes) > len(part_nodes) - 1, partition=partition, ) ) except (ValueError, OSError): continue random.shuffle(jobs) if self.handoffs_first: # Move the handoff parts to the front of the list jobs.sort(key=lambda job: not job["delete"]) self.job_count = len(jobs) return jobs
def __call__(self, env, start_response): req = Request(env) lxc_host = env.get("HTTP_X_OBJECT_META_LXC_HOST") addresses = whataremyips() if lxc_host in addresses: #path_hash = hash_path(account, container, obj) ring = Ring(self.object_ring_path) raw_path = env.get("RAW_PATH_INFO").split("/") path_hash = hash_path(raw_path[3],raw_path[4],raw_path[5]) f_location = storage_directory("objects", raw_path[2], path_hash) path = "%s/%s/%s" % (self.root, raw_path[1], f_location) #Check if container exists and is running self.check_container(path, raw_path[5]) return self.app(env, start_response)
def collect_jobs(self): """ Returns a sorted list of jobs (dictionaries) that specify the partitions, nodes, etc to be rsynced. """ jobs = [] ips = whataremyips() for local_dev in [dev for dev in self.object_ring.devs if dev and dev['ip'] in ips and dev['port'] == self.port]: dev_path = join(self.devices_dir, local_dev['device']) obj_path = join(dev_path, 'objects') tmp_path = join(dev_path, 'tmp') if self.mount_check and not os.path.ismount(dev_path): self.logger.warn(_('%s is not mounted'), local_dev['device']) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception('ERROR creating %s' % obj_path) continue for partition in os.listdir(obj_path): try: job_path = join(obj_path, partition) if isfile(job_path): # Clean up any (probably zero-byte) files where a # partition should be. self.logger.warning('Removing partition directory ' 'which was a file: %s', job_path) os.remove(job_path) continue part_nodes = \ self.object_ring.get_part_nodes(int(partition)) nodes = [node for node in part_nodes if node['id'] != local_dev['id']] jobs.append( dict(path=job_path, device=local_dev['device'], nodes=nodes, delete=len(nodes) > len(part_nodes) - 1, partition=partition)) except (ValueError, OSError): continue random.shuffle(jobs) self.job_count = len(jobs) return jobs
def collect_jobs(self, override_devices=None, override_partitions=None, override_policies=None): """ Returns a sorted list of jobs (dictionaries) that specify the partitions, nodes, etc to be rsynced. :param override_devices: if set, only jobs on these devices will be returned :param override_partitions: if set, only jobs on these partitions will be returned :param override_policies: if set, only jobs in these storage policies will be returned """ jobs = [] ips = whataremyips(self.bind_ip) for policy in POLICIES: # Skip replication if next_part_power is set. In this case # every object is hard-linked twice, but the replicator can't # detect them and would create a second copy of the file if not # yet existing - and this might double the actual transferred # and stored data next_part_power = getattr(policy.object_ring, 'next_part_power', None) if next_part_power is not None: self.logger.warning( _("next_part_power set in policy '%s'. Skipping"), policy.name) continue if policy.policy_type == REPL_POLICY: if (override_policies is not None and policy.idx not in override_policies): continue # ensure rings are loaded for policy self.load_object_ring(policy) jobs += self.build_replication_jobs( policy, ips, override_devices=override_devices, override_partitions=override_partitions) random.shuffle(jobs) if self.handoffs_first: # Move the handoff parts to the front of the list jobs.sort(key=lambda job: not job['delete']) self.job_count = len(jobs) return jobs
def collect_jobs(self): """ Returns a sorted list of jobs (dictionaries) that specify the partitions, nodes, etc to be rsynced. """ jobs = [] ips = whataremyips() for policy in POLICIES: # may need to branch here for future policy types self.process_repl(policy, jobs, ips) random.shuffle(jobs) if self.handoffs_first: # Move the handoff parts to the front of the list jobs.sort(key=lambda job: not job['delete']) self.job_count = len(jobs) return jobs
def __init__(self, conf): self.conf = conf self.logger = get_logger(conf, log_route='account-reaper') self.devices = conf.get('devices', '/srv/node') self.mount_check = conf.get('mount_check', 'true').lower() in \ ('true', 't', '1', 'on', 'yes', 'y') self.interval = int(conf.get('interval', 3600)) self.swift_dir = conf.get('swift_dir', '/etc/swift') self.account_ring = None self.container_ring = None self.object_ring = None self.node_timeout = int(conf.get('node_timeout', 10)) self.conn_timeout = float(conf.get('conn_timeout', 0.5)) self.myips = whataremyips() self.concurrency = int(conf.get('concurrency', 25)) self.container_concurrency = self.object_concurrency = \ sqrt(self.concurrency) self.container_pool = GreenPool(size=self.container_concurrency)
def __init__(self, conf, container_ring=None, object_ring=None): #: The dict of configuration values from the [container-sync] section #: of the container-server.conf. self.conf = conf #: Logger to use for container-sync log lines. self.logger = get_logger(conf, log_route="container-sync") #: Path to the local device mount points. self.devices = conf.get("devices", "/srv/node") #: Indicates whether mount points should be verified as actual mount #: points (normally true, false for tests and SAIO). self.mount_check = config_true_value(conf.get("mount_check", "true")) #: Minimum time between full scans. This is to keep the daemon from #: running wild on near empty systems. self.interval = int(conf.get("interval", 300)) #: Maximum amount of time to spend syncing a container before moving on #: to the next one. If a conatiner sync hasn't finished in this time, #: it'll just be resumed next scan. self.container_time = int(conf.get("container_time", 60)) #: The list of hosts we're allowed to send syncs to. self.allowed_sync_hosts = [ h.strip() for h in conf.get("allowed_sync_hosts", "127.0.0.1").split(",") if h.strip() ] self.proxy = conf.get("sync_proxy") #: Number of containers with sync turned on that were successfully #: synced. self.container_syncs = 0 #: Number of successful DELETEs triggered. self.container_deletes = 0 #: Number of successful PUTs triggered. self.container_puts = 0 #: Number of containers that didn't have sync turned on. self.container_skips = 0 #: Number of containers that had a failure of some type. self.container_failures = 0 #: Time of last stats report. self.reported = time() swift_dir = conf.get("swift_dir", "/etc/swift") #: swift.common.ring.Ring for locating containers. self.container_ring = container_ring or Ring(swift_dir, ring_name="container") #: swift.common.ring.Ring for locating objects. self.object_ring = object_ring or Ring(swift_dir, ring_name="object") self._myips = whataremyips() self._myport = int(conf.get("bind_port", 6001)) swift.common.db.DB_PREALLOCATION = config_true_value(conf.get("db_preallocation", "f"))
def collect_jobs(self, override_devices=None, override_partitions=None, override_policies=None): """ Returns a sorted list of jobs (dictionaries) that specify the partitions, nodes, etc to be rsynced. :param override_devices: if set, only jobs on these devices will be returned :param override_partitions: if set, only jobs on these partitions will be returned :param override_policies: if set, only jobs in these storage policies will be returned """ jobs = [] ips = whataremyips(self.bind_ip) for policy in POLICIES: # Skip replication if next_part_power is set. In this case # every object is hard-linked twice, but the replicator can't # detect them and would create a second copy of the file if not # yet existing - and this might double the actual transferred # and stored data next_part_power = getattr( policy.object_ring, 'next_part_power', None) if next_part_power is not None: self.logger.warning( _("next_part_power set in policy '%s'. Skipping"), policy.name) continue if policy.policy_type == REPL_POLICY: if (override_policies is not None and str(policy.idx) not in override_policies): continue # ensure rings are loaded for policy self.load_object_ring(policy) jobs += self.build_replication_jobs( policy, ips, override_devices=override_devices, override_partitions=override_partitions) random.shuffle(jobs) if self.handoffs_first: # Move the handoff parts to the front of the list jobs.sort(key=lambda job: not job['delete']) self.job_count = len(jobs) return jobs
def __init__(self, conf, handler_class, logger=None): self.logger = logger self.conf = conf self.root = conf['devices'] self.bulk = conf.get('bulk_process', False) self.interval = 10 self.swift_dir = '/etc/swift' self.container_ring = Ring(self.swift_dir, ring_name='container') self.status_dir = conf['status_dir'] self.myips = whataremyips('0.0.0.0') self.items_chunk = conf['items_chunk'] self.poll_interval = conf.get('poll_interval', 5) self.handler_class = handler_class if not self.bulk: self._init_workers(conf) self.log('debug', 'Created the Container Crawler instance')
def run_once(self, *args, **kwargs): """Run a replication pass once.""" self._zero_stats() dirs = [] ips = whataremyips(self.bind_ip) if not ips: self.logger.error(_('ERROR Failed to get my own IPs?')) return self._local_device_ids = set() found_local = False ###遍历节点 for node in self.ring.devs: if node and is_local_device(ips, self.port, node['replication_ip'], node['replication_port']): found_local = True if self.mount_check and not ismount( os.path.join(self.root, node['device'])): self._add_failure_stats( [(failure_dev['replication_ip'], failure_dev['device']) for failure_dev in self.ring.devs if failure_dev]) self.logger.warning( _('Skipping %(device)s as it is not mounted') % node) continue unlink_older_than( os.path.join(self.root, node['device'], 'tmp'), time.time() - self.reclaim_age) datadir = os.path.join(self.root, node['device'], self.datadir) if os.path.isdir(datadir): self._local_device_ids.add(node['id']) dirs.append((datadir, node['id'])) if not found_local: self.logger.error("Can't find itself %s with port %s in ring " "file, not replicating", ", ".join(ips), self.port) self.logger.info(_('Beginning replication run')) for part, object_file, node_id in roundrobin_datadirs(dirs): self.cpool.spawn_n( self._replicate_object, part, object_file, node_id) self.cpool.waitall() self.logger.info(_('Replication run OVER')) self._report_stats()
def run_once(self, *args, **kwargs): """Run a replication pass once.""" self._zero_stats() dirs = [] ips = whataremyips(self.bind_ip) if not ips: self.logger.error(_('ERROR Failed to get my own IPs?')) return self._local_device_ids = set() found_local = False for node in self.ring.devs: if node and is_local_device(ips, self.port, node['replication_ip'], node['replication_port']): found_local = True if self.mount_check and not ismount( os.path.join(self.root, node['device'])): self._add_failure_stats( [(failure_dev['replication_ip'], failure_dev['device']) for failure_dev in self.ring.devs if failure_dev]) self.logger.warning( _('Skipping %(device)s as it is not mounted') % node) continue unlink_older_than( os.path.join(self.root, node['device'], 'tmp'), time.time() - self.reclaim_age) datadir = os.path.join(self.root, node['device'], self.datadir) if os.path.isdir(datadir): self._local_device_ids.add(node['id']) dirs.append((datadir, node['id'])) if not found_local: self.logger.error("Can't find itself %s with port %s in ring " "file, not replicating", ", ".join(ips), self.port) self.logger.info(_('Beginning replication run')) for part, object_file, node_id in roundrobin_datadirs(dirs): self.cpool.spawn_n( self._replicate_object, part, object_file, node_id) self.cpool.waitall() self.logger.info(_('Replication run OVER')) self._report_stats()
def __call__(self, env, start_response): f_arg = start_response if self.location.lower() == 'proxy': req = Request(env) dt = datetime.now() ts = mktime(dt.timetuple()) + (dt.microsecond / 1000000.) week_day = date.today().strftime("%a") server_ip = whataremyips() txd = req.environ['swift.trans_id'] start_time = time.time() # URL format is http:[host]/container/object version, account, container, obj = split_path(req.path, 1, 4, True) if container is None: container = '' if obj is None: obj = '' str_env = str(env) str_env = str_env.replace('"', '\'') user_agent = env['HTTP_USER_AGENT'] if 'HTTP_USER_AGENT' in env else '' msg = self.log_fm % (str_env, ts, dt.year, dt.month, dt.day, week_day, dt.hour, dt.minute, dt.second, dt.microsecond, req.method, req.path, account, container, obj, req.content_length, req.params, server_ip[0], req.remote_addr, user_agent, txd) def response_logging(status, response_headers, exc_info=None): elapse = time.time() - start_time full_msg = '%s,%s,%.8f' % (msg, status.split(' ', 1)[0], elapse) self.logger.info(full_msg) return start_response(status, response_headers, exc_info) f_arg = response_logging try: resp = self.app(env, f_arg) except Exception: self.client.captureException() raise return resp
def get_local_devices(self): """ Returns a set of all local devices in all replication-type storage policies. This is the device names, e.g. "sdq" or "d1234" or something, not the full ring entries. """ ips = whataremyips(self.bind_ip) local_devices = set() for policy in POLICIES: if policy.policy_type != REPL_POLICY: continue self.load_object_ring(policy) for device in policy.object_ring.devs: if device and is_local_device(ips, self.port, device['replication_ip'], device['replication_port']): local_devices.add(device['device']) return local_devices
def __init__(self, conf): self.conf = conf self.logger = get_logger(conf, log_route='account-reaper') self.devices = conf.get('devices', '/srv/node') self.mount_check = config_true_value(conf.get('mount_check', 'true')) self.interval = int(conf.get('interval', 3600)) self.swift_dir = conf.get('swift_dir', '/etc/swift') self.account_ring = None self.container_ring = None self.object_ring = None self.node_timeout = int(conf.get('node_timeout', 10)) self.conn_timeout = float(conf.get('conn_timeout', 0.5)) self.myips = whataremyips() self.concurrency = int(conf.get('concurrency', 25)) self.container_concurrency = self.object_concurrency = \ sqrt(self.concurrency) self.container_pool = GreenPool(size=self.container_concurrency) swift.common.db.DB_PREALLOCATION = \ config_true_value(conf.get('db_preallocation', 'f')) self.delay_reaping = int(conf.get('delay_reaping') or 0)
def get_local_devices(self): """ Returns a set of all local devices in all replication-type storage policies. This is the device names, e.g. "sdq" or "d1234" or something, not the full ring entries. """ ips = whataremyips(self.bind_ip) local_devices = set() for policy in POLICIES: if policy.policy_type != REPL_POLICY: continue self.load_object_ring(policy) for device in policy.object_ring.devs: if device and is_local_device( ips, self.port, device['replication_ip'], device['replication_port']): local_devices.add(device['device']) return local_devices
def __init__(self, conf): self.conf = conf self.logger = get_logger(conf) self.devices = conf.get('devices', '/srv/node') self.mount_check = conf.get('mount_check', 'true').lower() in \ ('true', 't', '1', 'on', 'yes', 'y') self.interval = int(conf.get('interval', 3600)) swift_dir = conf.get('swift_dir', '/etc/swift') self.account_ring_path = os.path.join(swift_dir, 'account.ring.gz') self.container_ring_path = os.path.join(swift_dir, 'container.ring.gz') self.object_ring_path = os.path.join(swift_dir, 'object.ring.gz') self.account_ring = None self.container_ring = None self.object_ring = None self.node_timeout = int(conf.get('node_timeout', 10)) self.conn_timeout = float(conf.get('conn_timeout', 0.5)) self.myips = whataremyips() self.concurrency = int(conf.get('concurrency', 25)) self.container_concurrency = self.object_concurrency = \ sqrt(self.concurrency) self.container_pool = GreenPool(size=self.container_concurrency)
def __init__(self, conf, ring, datadir, default_port, logger): self.logger = logger self.datadir = datadir self.conf = conf self.devices = conf.get('devices', '/srv/node/') port = int(conf.get('bind_port', default_port)) my_ips = whataremyips() # device is a tuple (<device name>, <device mirror_copies>) self.device = None self.device_mirror_copies = 1 for dev in ring.devs: if dev['ip'] in my_ips and dev['port'] == port: self.device_mirror_copies = int(dev.get('mirror_copies', 1)) self.device = dev['device'] break if not self.device: raise SwiftConfigurationError( _("Can\'t find device for this daemon")) self.faulted_devices = set() self.degraded_devices = set() self.unavailable_devices = set()
def __init__(self, conf, srvdir): LFS.__init__(self, conf, srvdir) self.fs = "zfs" self.topfs = conf.get('topfs') self.check_interval = int(conf.get('check_interval', '30')) mkdirs(self.root) devs = ring.Ring(os.path.join(conf.get('swift_dir', '/etc/swift'), 'object.ring.gz')).get_devs() my_ips = whataremyips() # pools is a list of tuple => (<pool name>, <pool mirrorr_count>) self.pools = \ [ (dev['device'], dev['mirror_copies']) for dev in devs \ if dev['ip'] in my_ips] # Create the Top level ZFS. for pool, mr_count in self.pools: zfs_create(pool, self.topfs, '%s/%s' %(self.root, pool)) self.degraded_pools = [] self.faulted_pools = [] self.misconfigured_pools = [] if not self.topfs: sys.stderr.write( "Cannot locate ZFS filesystem for the Server. Exiting..\n") sys.exit(1) self.fs_per_part = False self.fs_per_obj = False if self.conf.get('fs_per_obj', 'false') == 'true': self.fs_per_part = True self.fs_per_obj = True elif self.conf.get('fs_per_part', 'false') == 'true': self.fs_per_part = True self.status_checker = LFSStatus(self.check_interval, self.check_pools, ()) self.status_checker.start()
def collect_jobs(self, old_dict, new_dict, moving_map): """ Returns a sorted list of jobs (dictionaries) that specify the partitions, nodes, etc to be rsynced. :param old_dict: dictionary with devices from old ring :param new_dict: dictionary with devices from new ring :param moving_map: the dictionary that contains all the partitions that should be moved, their sources and destinations """ jobs = [] ips = whataremyips(self.bind_ip) for policy in POLICIES: if policy.policy_type == REPL_POLICY: # ensure rings are loaded for policy self.load_object_ring(policy) jobs += self.build_replication_jobs(policy, ips, old_dict, new_dict, moving_map) random.shuffle(jobs) return jobs
def collect_jobs(self, old_dict, new_dict, moving_map): """ Returns a sorted list of jobs (dictionaries) that specify the partitions, nodes, etc to be rsynced. :param old_dict: dictionary with devices from old ring :param new_dict: dictionary with devices from new ring :param moving_map: the dictionary that contains all the partitions that should be moved, their sources and destinations """ jobs = [] ips = whataremyips(self.bind_ip) for policy in POLICIES: if policy.policy_type == REPL_POLICY: # ensure rings are loaded for policy self.load_object_ring(policy) jobs += self.build_replication_jobs( policy, ips, old_dict, new_dict, moving_map) random.shuffle(jobs) return jobs
def __init__(self, swift_dir, bind_ip): self.swift_dir = swift_dir self.mtimes_by_ring_path = {} self.portsets_by_ring_path = {} self.my_ips = set(whataremyips(bind_ip))
def run_once(self, *args, **kwargs): """Run a replication pass once.""" override_options = parse_override_options(once=True, **kwargs) devices_to_replicate = override_options.devices or Everything() partitions_to_replicate = override_options.partitions or Everything() self._zero_stats() dirs = [] ips = whataremyips(self.bind_ip) if not ips: self.logger.error(_('ERROR Failed to get my own IPs?')) return if self.handoffs_only: self.logger.warning( 'Starting replication pass with handoffs_only enabled. ' 'This mode is not intended for normal ' 'operation; use handoffs_only with care.') self._local_device_ids = set() found_local = False for node in self.ring.devs: if node and is_local_device(ips, self.port, node['replication_ip'], node['replication_port']): found_local = True try: dev_path = check_drive(self.root, node['device'], self.mount_check) except ValueError as err: self._add_failure_stats( [(failure_dev['replication_ip'], failure_dev['device']) for failure_dev in self.ring.devs if failure_dev]) self.logger.warning('Skipping: %s', err) continue if node['device'] not in devices_to_replicate: self.logger.debug( 'Skipping device %s due to given arguments', node['device']) continue unlink_older_than( os.path.join(dev_path, 'tmp'), time.time() - self.reclaim_age) datadir = os.path.join(self.root, node['device'], self.datadir) if os.path.isdir(datadir): self._local_device_ids.add(node['id']) part_filt = self._partition_dir_filter( node['id'], partitions_to_replicate) dirs.append((datadir, node['id'], part_filt)) if not found_local: self.logger.error("Can't find itself %s with port %s in ring " "file, not replicating", ", ".join(ips), self.port) self.logger.info(_('Beginning replication run')) for part, object_file, node_id in self.roundrobin_datadirs(dirs): self.cpool.spawn_n( self._replicate_object, part, object_file, node_id) self.cpool.waitall() self.logger.info(_('Replication run OVER')) if self.handoffs_only: self.logger.warning( 'Finished replication pass with handoffs_only enabled. ' 'If handoffs_only is no longer required, disable it.') self._report_stats()
from sys import exit from smtplib import SMTP from socket import gethostname from swift.common.constraints import check_mount from swift.common.utils import whataremyips from swift.common.ring import Ring try: ring = Ring('/etc/swift/object.ring.gz') except IOError: exit() my_ips = whataremyips() mounted = 0 drivecount = 0 drivelabels = [] for dev in ring.devs: try: if dev['ip'] in my_ips and float(dev['weight']) > 0: drivecount += 1 if check_mount('/srv/node', dev['device']): mounted += 1 else: drivelabels.append(dev['device']) except TypeError: pass
def test_whataremyips(self): myips = utils.whataremyips() self.assert_(len(myips) > 1) self.assert_('127.0.0.1' in myips)
def __init__(self, conf, container_ring=None, logger=None): #: The dict of configuration values from the [container-sync] section #: of the container-server.conf. self.conf = conf #: Logger to use for container-sync log lines. self.logger = logger or get_logger(conf, log_route='container-sync') #: Path to the local device mount points. self.devices = conf.get('devices', '/srv/node') #: Indicates whether mount points should be verified as actual mount #: points (normally true, false for tests and SAIO). self.mount_check = config_true_value(conf.get('mount_check', 'true')) #: Minimum time between full scans. This is to keep the daemon from #: running wild on near empty systems. self.interval = int(conf.get('interval', 300)) #: Maximum amount of time to spend syncing a container before moving on #: to the next one. If a conatiner sync hasn't finished in this time, #: it'll just be resumed next scan. self.container_time = int(conf.get('container_time', 60)) #: ContainerSyncCluster instance for validating sync-to values. self.realms_conf = ContainerSyncRealms( os.path.join( conf.get('swift_dir', '/etc/swift'), 'container-sync-realms.conf'), self.logger) #: The list of hosts we're allowed to send syncs to. This can be #: overridden by data in self.realms_conf self.allowed_sync_hosts = [ h.strip() for h in conf.get('allowed_sync_hosts', '127.0.0.1').split(',') if h.strip()] self.http_proxies = [ a.strip() for a in conf.get('sync_proxy', '').split(',') if a.strip()] #: ContainerSyncStore instance for iterating over synced containers self.sync_store = ContainerSyncStore(self.devices, self.logger, self.mount_check) #: Number of containers with sync turned on that were successfully #: synced. self.container_syncs = 0 #: Number of successful DELETEs triggered. self.container_deletes = 0 #: Number of successful PUTs triggered. self.container_puts = 0 #: Number of containers whose sync has been turned off, but #: are not yet cleared from the sync store. self.container_skips = 0 #: Number of containers that had a failure of some type. self.container_failures = 0 #: Time of last stats report. self.reported = time() self.swift_dir = conf.get('swift_dir', '/etc/swift') #: swift.common.ring.Ring for locating containers. self.container_ring = container_ring or Ring(self.swift_dir, ring_name='container') bind_ip = conf.get('bind_ip', '0.0.0.0') self._myips = whataremyips(bind_ip) self._myport = int(conf.get('bind_port', 6001)) swift.common.db.DB_PREALLOCATION = \ config_true_value(conf.get('db_preallocation', 'f')) self.conn_timeout = float(conf.get('conn_timeout', 5)) request_tries = int(conf.get('request_tries') or 3) internal_client_conf_path = conf.get('internal_client_conf_path') if not internal_client_conf_path: self.logger.warning( _('Configuration option internal_client_conf_path not ' 'defined. Using default configuration, See ' 'internal-client.conf-sample for options')) internal_client_conf = ConfigString(ic_conf_body) else: internal_client_conf = internal_client_conf_path try: self.swift = InternalClient( internal_client_conf, 'Swift Container Sync', request_tries) except IOError as err: if err.errno != errno.ENOENT: raise raise SystemExit( _('Unable to load internal client from config: %r (%s)') % (internal_client_conf_path, err))
def __init__(self, conf, container_ring=None, logger=None): #: The dict of configuration values from the [container-sync] section #: of the container-server.conf. self.conf = conf #: Logger to use for container-sync log lines. self.logger = logger or get_logger(conf, log_route='container-sync') #: Path to the local device mount points. self.devices = conf.get('devices', '/srv/node') #: Indicates whether mount points should be verified as actual mount #: points (normally true, false for tests and SAIO). self.mount_check = config_true_value(conf.get('mount_check', 'true')) #: Minimum time between full scans. This is to keep the daemon from #: running wild on near empty systems. self.interval = int(conf.get('interval', 300)) #: Maximum amount of time to spend syncing a container before moving on #: to the next one. If a container sync hasn't finished in this time, #: it'll just be resumed next scan. self.container_time = int(conf.get('container_time', 60)) #: ContainerSyncCluster instance for validating sync-to values. self.realms_conf = ContainerSyncRealms( os.path.join(conf.get('swift_dir', '/etc/swift'), 'container-sync-realms.conf'), self.logger) #: The list of hosts we're allowed to send syncs to. This can be #: overridden by data in self.realms_conf self.allowed_sync_hosts = [ h.strip() for h in conf.get('allowed_sync_hosts', '127.0.0.1').split(',') if h.strip() ] self.http_proxies = [ a.strip() for a in conf.get('sync_proxy', '').split(',') if a.strip() ] #: ContainerSyncStore instance for iterating over synced containers self.sync_store = ContainerSyncStore(self.devices, self.logger, self.mount_check) #: Number of containers with sync turned on that were successfully #: synced. self.container_syncs = 0 #: Number of successful DELETEs triggered. self.container_deletes = 0 #: Number of successful PUTs triggered. self.container_puts = 0 #: Number of containers whose sync has been turned off, but #: are not yet cleared from the sync store. self.container_skips = 0 #: Number of containers that had a failure of some type. self.container_failures = 0 #: Per container stats. These are collected per container. #: puts - the number of puts that were done for the container #: deletes - the number of deletes that were fot the container #: bytes - the total number of bytes transferred per the container self.container_stats = collections.defaultdict(int) self.container_stats.clear() #: Time of last stats report. self.reported = time() self.swift_dir = conf.get('swift_dir', '/etc/swift') #: swift.common.ring.Ring for locating containers. self.container_ring = container_ring or Ring(self.swift_dir, ring_name='container') bind_ip = conf.get('bind_ip', '0.0.0.0') self._myips = whataremyips(bind_ip) self._myport = int(conf.get('bind_port', 6201)) swift.common.db.DB_PREALLOCATION = \ config_true_value(conf.get('db_preallocation', 'f')) self.conn_timeout = float(conf.get('conn_timeout', 5)) request_tries = int(conf.get('request_tries') or 3) internal_client_conf_path = conf.get('internal_client_conf_path') if not internal_client_conf_path: self.logger.warning( _('Configuration option internal_client_conf_path not ' 'defined. Using default configuration, See ' 'internal-client.conf-sample for options')) internal_client_conf = ConfigString(ic_conf_body) else: internal_client_conf = internal_client_conf_path try: self.swift = InternalClient(internal_client_conf, 'Swift Container Sync', request_tries) except (OSError, IOError) as err: if err.errno != errno.ENOENT and \ not str(err).endswith(' not found'): raise raise SystemExit( _('Unable to load internal client from config: ' '%(conf)r (%(error)s)') % { 'conf': internal_client_conf_path, 'error': err })
def collect_parts(self, override_devices=None, override_partitions=None): """ Helper for getting partitions in the top level reconstructor """ override_devices = override_devices or [] override_partitions = override_partitions or [] ips = whataremyips(self.bind_ip) ec_policies = (policy for policy in POLICIES if policy.policy_type == EC_POLICY) policy2devices = {} for policy in ec_policies: self.load_object_ring(policy) local_devices = list( six.moves.filter( lambda dev: dev and is_local_device( ips, self.port, dev['replication_ip'], dev[ 'replication_port']), policy.object_ring.devs)) if override_devices: local_devices = list( six.moves.filter( lambda dev_info: dev_info['device'] in override_devices, local_devices)) policy2devices[policy] = local_devices self.device_count += len(local_devices) all_parts = [] for policy, local_devices in policy2devices.items(): df_mgr = self._df_router[policy] for local_dev in local_devices: dev_path = df_mgr.get_dev_path(local_dev['device']) if not dev_path: self.logger.warning(_('%s is not mounted'), local_dev['device']) continue data_dir = get_data_dir(policy) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(int(policy))) unlink_older_than(tmp_path, time.time() - df_mgr.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception('Unable to create %s' % obj_path) continue try: partitions = os.listdir(obj_path) except OSError: self.logger.exception('Unable to list partitions in %r' % obj_path) continue self.part_count += len(partitions) for partition in partitions: part_path = join(obj_path, partition) if partition in ('auditor_status_ALL.json', 'auditor_status_ZBF.json'): continue if not partition.isdigit(): self.logger.warning( 'Unexpected entity in data dir: %r' % part_path) self.delete_partition(part_path) self.reconstruction_part_count += 1 continue partition = int(partition) if override_partitions and (partition not in override_partitions): continue part_info = { 'local_dev': local_dev, 'policy': policy, 'partition': partition, 'part_path': part_path, } all_parts.append(part_info) random.shuffle(all_parts) return all_parts
def collect_parts(self, override_devices=None, override_partitions=None): """ Helper for yielding partitions in the top level reconstructor """ override_devices = override_devices or [] override_partitions = override_partitions or [] ips = whataremyips(self.bind_ip) for policy in POLICIES: if policy.policy_type != EC_POLICY: continue self._diskfile_mgr = self._df_router[policy] self.load_object_ring(policy) data_dir = get_data_dir(policy) local_devices = list(six.moves.filter( lambda dev: dev and is_local_device( ips, self.port, dev['replication_ip'], dev['replication_port']), policy.object_ring.devs)) if override_devices: self.device_count = len(override_devices) else: self.device_count = len(local_devices) for local_dev in local_devices: if override_devices and (local_dev['device'] not in override_devices): continue self.reconstruction_device_count += 1 dev_path = self._df_router[policy].get_dev_path( local_dev['device']) if not dev_path: self.logger.warning(_('%s is not mounted'), local_dev['device']) continue obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(int(policy))) unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception( 'Unable to create %s' % obj_path) continue try: partitions = os.listdir(obj_path) except OSError: self.logger.exception( 'Unable to list partitions in %r' % obj_path) continue self.part_count += len(partitions) for partition in partitions: part_path = join(obj_path, partition) if not (partition.isdigit() and os.path.isdir(part_path)): self.logger.warning( 'Unexpected entity in data dir: %r' % part_path) remove_file(part_path) self.reconstruction_part_count += 1 continue partition = int(partition) if override_partitions and (partition not in override_partitions): continue part_info = { 'local_dev': local_dev, 'policy': policy, 'partition': partition, 'part_path': part_path, } yield part_info self.reconstruction_part_count += 1
#!/usr/bin/python from sys import exit from smtplib import SMTP from socket import gethostname from swift.common.constraints import check_mount from swift.common.utils import whataremyips from swift.common.ring import Ring try: ring = Ring('/etc/swift/object.ring.gz') except IOError: exit() my_ips = whataremyips() mounted = 0 drivecount = 0 drivelabels = [] for dev in ring.devs: try: if dev['ip'] in my_ips and float(dev['weight']) > 0: drivecount += 1 if check_mount('/srv/node', dev['device']): mounted += 1 else: drivelabels.append(dev['device']) except TypeError: pass unmounted = drivecount - mounted
def collect_parts(self, override_devices=None, override_partitions=None): """ Helper for getting partitions in the top level reconstructor In handoffs_only mode no primary partitions will not be included in the returned (possibly empty) list. """ override_devices = override_devices or [] override_partitions = override_partitions or [] ips = whataremyips(self.bind_ip) ec_policies = (policy for policy in POLICIES if policy.policy_type == EC_POLICY) policy2devices = {} for policy in ec_policies: self.load_object_ring(policy) local_devices = list( six.moves.filter( lambda dev: dev and is_local_device( ips, self.port, dev['replication_ip'], dev[ 'replication_port']), policy.object_ring.devs)) if override_devices: local_devices = list( six.moves.filter( lambda dev_info: dev_info['device'] in override_devices, local_devices)) policy2devices[policy] = local_devices self.device_count += len(local_devices) all_parts = [] for policy, local_devices in policy2devices.items(): # Skip replication if next_part_power is set. In this case # every object is hard-linked twice, but the replicator # can't detect them and would create a second copy of the # file if not yet existing - and this might double the # actual transferred and stored data next_part_power = getattr(policy.object_ring, 'next_part_power', None) if next_part_power is not None: self.logger.warning( _("next_part_power set in policy '%s'. Skipping"), policy.name) continue df_mgr = self._df_router[policy] for local_dev in local_devices: dev_path = df_mgr.get_dev_path(local_dev['device']) if not dev_path: self.logger.warning(_('%s is not mounted'), local_dev['device']) continue data_dir = get_data_dir(policy) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(int(policy))) unlink_older_than(tmp_path, time.time() - df_mgr.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception('Unable to create %s' % obj_path) continue try: partitions = os.listdir(obj_path) except OSError: self.logger.exception('Unable to list partitions in %r' % obj_path) continue self.part_count += len(partitions) for partition in partitions: part_path = join(obj_path, partition) if partition in ('auditor_status_ALL.json', 'auditor_status_ZBF.json'): continue if not partition.isdigit(): self.logger.warning( 'Unexpected entity in data dir: %r' % part_path) self.delete_partition(part_path) self.reconstruction_part_count += 1 continue partition = int(partition) if override_partitions and (partition not in override_partitions): continue # N.B. At a primary node in handoffs_only mode may skip to # sync misplaced (handoff) fragments in the primary # partition. That may happen while rebalancing several # times. (e.g. a node holding handoff fragment being a new # primary) Those fragments will be synced (and revert) once # handoffs_only mode turned off. if self.handoffs_only and any(local_dev['id'] == n['id'] for n in policy.object_ring. get_part_nodes(partition)): self.logger.debug( 'Skipping %s job for %s ' 'while in handoffs_only mode.', SYNC, part_path) continue part_info = { 'local_dev': local_dev, 'policy': policy, 'partition': partition, 'part_path': part_path, } all_parts.append(part_info) random.shuffle(all_parts) return all_parts
def collect_jobs(self): """ Returns a sorted list of jobs (dictionaries) that specify the partitions, nodes, etc to be synced. """ jobs = [] ips = whataremyips() for local_dev in [dev for dev in self.object_ring.devs if dev and dev['replication_ip'] in ips and dev['replication_port'] == self.port]: dev_path = join(self.devices_dir, local_dev['device']) obj_path = join(dev_path, 'objects') tmp_path = join(dev_path, 'tmp') if self.mount_check and not ismount(dev_path): self.logger.warn(_('%s is not mounted'), local_dev['device']) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception('ERROR creating %s' % obj_path) continue for partition in os.listdir(obj_path): try: job_path = join(obj_path, partition) if isfile(job_path): # Clean up any (probably zero-byte) files where a # partition should be. self.logger.warning('Removing partition directory ' 'which was a file: %s', job_path) os.remove(job_path) continue part_nodes = \ self.object_ring.get_part_nodes(int(partition)) #### CHANGED CODE #### #f = open("/home/swift/spindowndevices","r") #sdlist = f.read().strip().split("\n") #logging.info("===Spun down devices===:%s",str(sdlist)) #f.close() #sddict =dict() #for i in sdlist: # logging.info("===sdditc===%s",sddict) # if(i.split(":")[0] in sddict): # sddict[i.split(":")[0]].append(i.split(":")[1]) # else: # sddict[i.split(":")[0]] = [] # sddict[i.split(":")[0]].append(i.split(":")[1]) #nodes = [] #for node in part_nodes: # if(node['ip'] not in sddict and node['id']!= local_dev['id']): # nodes.append(node) # else: # if(node['device'] not in sddict[node['ip']] and node['id']!=local_dev['id']): # nodes.append(node) nodes = [node for node in part_nodes if node['id'] != local_dev['id']] logging.info("===Replication nodes===%s",str(nodes)) # logging.info("===sddict===%s",str(sddict)) #### END CHANGED CODE #### jobs.append( dict(path=job_path, device=local_dev['device'], nodes=nodes, delete=len(nodes) > len(part_nodes) - 1, partition=partition)) except (ValueError, OSError): continue random.shuffle(jobs) if self.handoffs_first: # Move the handoff parts to the front of the list jobs.sort(key=lambda job: not job['delete']) self.job_count = len(jobs) return jobs
import copy import json import logging import s3_sync.daemon_utils import s3_sync.migrator from s3_sync.stats import StatsReporterFactory from swift.common.utils import whataremyips from . import CONTAINER_RING MYIPS = whataremyips('0.0.0.0') class TempMigratorStatus(object): def __init__(self, config): new_config = copy.deepcopy(config) self.status_all = [{'config': new_config, 'status': {}}] def save_migration(self, config, marker, copied, scanned, bytes_count, is_reset): status = self.get_migration(config) status['marker'] = marker s3_sync.migrator._update_status_counts(status, copied, scanned, bytes_count, is_reset) def get_migration(self, config): # Currently, we only support a single migration configuration for stat in self.status_all: if s3_sync.migrator.equal_migration(stat['config'], config): return stat['status'] # doesn't exist new_config = copy.deepcopy(config)