def write(self): """Write down the service definition. """ super(LongrunService, self).write() # Mandatory settings if self._run_script is None and not os.path.exists(self._run_file): raise ValueError('Invalid LongRun service: not run script') elif self._run_script is not None: script_write(self._run_file, self._run_script) # Handle the case where the run script is a generator if not isinstance(self._run_script, str): self._run_script = None # Optional settings if self._finish_script is not None: script_write(self._finish_file, self._finish_script) # Handle the case where the finish script is a generator if not isinstance(self._finish_script, str): self._finish_script = None if self._log_run_script is not None: # Create the log dir on the spot fs.mkdir_safe(os.path.dirname(self._log_run_file)) script_write(self._log_run_file, self._log_run_script) # Handle the case where the run script is a generator if not isinstance(self._log_run_script, str): self._log_run_script = None if self._default_down: data_write(os.path.join(self._dir, 'down'), None) else: fs.rm_safe(os.path.join(self._dir, 'down')) if self._timeout_finish is not None: value_write(os.path.join(self._dir, 'timeout-finish'), self._timeout_finish) if self._notification_fd is not None: value_write(os.path.join(self._dir, 'notification-fd'), self._notification_fd)
def write(self): super(LongrunService, self).write() fs.mkdir_safe(self.env_dir) fs.mkdir_safe(self.data_dir) if self._env is not None: _utils.environ_dir_write(self.env_dir, self._env) if self._run_script is None and not os.path.exists(self._run_file): raise ValueError('Invalid LongRun service: not run script') elif self._run_script is not None: _utils.script_write(self._run_file, self._run_script) # Handle the case where the run script is a generator if not isinstance(self._run_script, six.string_types): self._run_script = None # Optional settings if self._finish_script is not None: _utils.script_write(self._finish_file, self._finish_script) # Handle the case where the finish script is a generator if not isinstance(self._finish_script, six.string_types): self._finish_script = None if self._log_run_script is not None: # Create the log dir on the spot fs.mkdir_safe(os.path.dirname(self._log_run_file)) _utils.script_write(self._log_run_file, self._log_run_script) # Handle the case where the run script is a generator if not isinstance(self._log_run_script, six.string_types): self._log_run_script = None if self._default_down: _utils.data_write(os.path.join(self._dir, 'down'), None) else: fs.rm_safe(os.path.join(self._dir, 'down')) if self._timeout_finish is not None: _utils.value_write(os.path.join(self._dir, 'timeout-finish'), self._timeout_finish)
def _sync_appgroup_lookups(zkclient, cell_app_groups): """Sync app group lookup databases.""" groups_by_proid, checksum_by_proid = _appgroup_group_by_proid( cell_app_groups) for proid in groups_by_proid: if not groups_by_proid[proid]: _LOGGER.debug('Appgroups not defined for proid: %s', proid) zkutils.ensure_deleted(z.path.appgroup_lookup, proid) continue # If node already exists with the proper checksum, ensure that others # are removed, but not recreate. digest = checksum_by_proid[proid].hexdigest() if zkclient.exists(z.path.appgroup_lookup(proid, digest)): _LOGGER.debug('Appgroup lookup for proid %s is up to date: %s', proid, digest) continue db_file = _create_lookup_db(groups_by_proid[proid]) try: _save_appgroup_lookup(zkclient, db_file, proid, digest) finally: fs.rm_safe(db_file)
def delete(self, princ, keytabs): """Delete keytab to locker. Only treadmill owner proid can delete keytab """ self._check_principal_user(princ) for kt_name in keytabs: fs.rm_safe(os.path.join(self._kt_spool_dir, kt_name))
def create(self, rrd_file, step, interval): """Creates rrd file for application metrics.""" _LOGGER.info('creating %r', rrd_file) fs.rm_safe(rrd_file) self.command(' '.join([ 'CREATE', rrd_file, '-s', str(step), '-b', str(int(time.time())), 'DS:memory_usage:GAUGE:%s:0:U' % interval, 'DS:memory_softlimit:GAUGE:%s:0:U' % interval, 'DS:memory_hardlimit:GAUGE:%s:0:U' % interval, 'DS:cpu_total:COUNTER:%s:0:U' % interval, 'DS:cpu_usage:GAUGE:%s:0:U' % interval, 'DS:cpu_ratio:GAUGE:%s:0:U' % interval, 'DS:blk_read_iops:COUNTER:%s:0:U' % interval, 'DS:blk_write_iops:COUNTER:%s:0:U' % interval, 'DS:blk_read_bps:COUNTER:%s:0:U' % interval, 'DS:blk_write_bps:COUNTER:%s:0:U' % interval, 'DS:fs_used_bytes:GAUGE:%s:0:U' % interval, 'RRA:MIN:0.5:{}s:20m'.format(step), 'RRA:MIN:0.5:10m:3d', 'RRA:MAX:0.5:{}s:20m'.format(step), 'RRA:MAX:0.5:10m:3d', 'RRA:AVERAGE:0.5:{}s:20m'.format(step), 'RRA:AVERAGE:0.5:10m:3d', ]))
def krb5keytabproxy(sock_path, krb5keytab_server, keytab_dir): """Run krb5keytab proxy server.""" if not sock_path: sock_path = _DEFAULT_SOCK_PATH if not keytab_dir: keytab_dir = _DEFAULT_KEYTAB_DIR # Check the keytab dir is owned by root. stat = os.stat(keytab_dir) if stat.st_uid != 0 or stat.st_gid != 0: _LOGGER.warning('Keytab directory must be owned by root: %s', keytab_dir) sys.exit(-1) fs.rm_safe(sock_path) os.environ['KRB5CCNAME'] = 'FILE:/tmp/krb5cc_host_krb5keytab_proxy' def _refresh_krbcc(): """Refresh host credentials.""" subproc.check_call(['kinit', '-k']) task.LoopingCall(_refresh_krbcc).start(_HOST_CREDS_REFRESH_INTERVAL) reactor.listenUNIX( sock_path, Krb5KeytabProxyFactory(list(krb5keytab_server), keytab_dir)) reactor.run()
def _configure(self, instance_name): """Configures and starts the instance based on instance cached event. - Runs app_configure --approot <rootdir> cache/<instance> :param ``str`` instance_name: Name of the instance to configure :returns ``bool``: True for successfully configured container. """ event_file = os.path.join( self.tm_env.cache_dir, instance_name ) with lc.LogContext(_LOGGER, instance_name): try: _LOGGER.info('Configuring') container_dir = app_cfg.configure(self.tm_env, event_file) app_cfg.schedule( container_dir, os.path.join(self.tm_env.running_dir, instance_name) ) return True except Exception as err: # pylint: disable=W0703 _LOGGER.exception('Error configuring (%r)', event_file) app_abort.abort(self.tm_env, event_file, err) fs.rm_safe(event_file) return False
def _remove_cleanup_app(self, path): """Stop and remove a cleanup app. """ name = os.path.basename(path) if name.startswith('.'): _LOGGER.warning('Ignore %s', name) return cleaning_link = os.path.join(self.tm_env.cleaning_dir, name) app_path = os.path.join(self.tm_env.cleanup_apps_dir, name) _LOGGER.info('Removing cleanup app %s -> %s', cleaning_link, app_path) if os.path.exists(cleaning_link): _LOGGER.debug('Removing cleanup link %s', cleaning_link) fs.rm_safe(cleaning_link) self._refresh_supervisor() _LOGGER.debug('Waiting on %s not being supervised', app_path) supervisor.ensure_not_supervised(app_path) else: _LOGGER.debug('Cleanup link %s does not exist', cleaning_link) _LOGGER.debug('Removing app directory %s', app_path) fs.rmtree_safe(app_path)
def invoke(self, runtime, instance, runtime_param=None): """Actually do the cleanup of the instance. """ cleanup_link = os.path.join(self.tm_env.cleanup_dir, instance) container_dir = os.readlink(cleanup_link) _LOGGER.info('Cleanup: %s => %s', instance, container_dir) if os.path.exists(container_dir): with lc.LogContext(_LOGGER, os.path.basename(container_dir), lc.ContainerAdapter) as log: try: app_runtime.get_runtime(runtime, self.tm_env, container_dir, runtime_param).finish() except Exception: # pylint: disable=W0703 if not os.path.exists(container_dir): log.info('Container dir does not exist: %s', container_dir) else: log.exception('Fatal error running finish %r.', container_dir) raise else: _LOGGER.info('Container dir does not exist: %r', container_dir) fs.rm_safe(cleanup_link)
def copy(self, dst, src=None): """Atomically copy tickets to destination.""" if src is None: src = self.tkt_path dst_dir = os.path.dirname(dst) with io.open(src, 'rb') as tkt_src_file: # TODO; rewrite as fs.write_safe. with tempfile.NamedTemporaryFile(dir=dst_dir, prefix='.tmp' + self.princ, delete=False, mode='wb') as tkt_dst_file: try: # Copy binary from source to dest shutil.copyfileobj(tkt_src_file, tkt_dst_file) # Set the owner if self.uid is not None: os.fchown(tkt_dst_file.fileno(), self.uid, -1) # Copy the mode src_stat = os.fstat(tkt_src_file.fileno()) os.fchmod(tkt_dst_file.fileno(), stat.S_IMODE(src_stat.st_mode)) tkt_dst_file.flush() os.rename(tkt_dst_file.name, dst) except (IOError, OSError): _LOGGER.exception('Error copying ticket from %s to %s', src, dst) finally: fs.rm_safe(tkt_dst_file.name)
def _cleanup_archive_dir(tm_env): """Delete old files from archive directory if space exceeds the threshold. """ archives = glob.glob(os.path.join(tm_env.archives_dir, '*')) infos = [] dir_size = 0 for archive in archives: try: archive_stat = os.stat(archive) except OSError as err: if err.errno == errno.ENOENT: continue raise dir_size += archive_stat.st_size infos.append((archive_stat.st_mtime, archive_stat.st_size, archive)) if dir_size <= _ARCHIVE_LIMIT: _LOGGER.info('Archive directory below threshold: %s', dir_size) return _LOGGER.info('Archive directory above threshold: %s gt %s', dir_size, _ARCHIVE_LIMIT) infos.sort() while dir_size > _ARCHIVE_LIMIT: ctime, size, archive = infos.pop(0) dir_size -= size _LOGGER.info('Unlink old archive %s: ctime: %s, size: %s', archive, ctime, size) fs.rm_safe(archive)
def _on_del_trace_db(zk2fs_sync, zkpath, sow_dir): """Called when trace DB snapshot is deleted.""" db_path = os.path.join(sow_dir, os.path.basename(zkpath)) fs.rm_safe(db_path) fpath = zk2fs_sync.fpath(zkpath) fs.rm_safe(fpath)
def sigterm_handler(_signo, _stack_frame): """Handle sigterm. On sigterm, stop Zookeeper session and delete Zookeeper session id file. """ _LOGGER.info('Got SIGTERM, closing zk session and rm: %s', zkid) fs.rm_safe(zkid) context.GLOBAL.zk.conn.stop()
def _remove_extra_alerts(alerts_dir, max_queue_length=0): """Keep the most recent max_queue_length files in alerts_dir. """ # None means do not slice index = None if max_queue_length == 0 else 0 - max_queue_length for alert_file in sorted(os.listdir(alerts_dir))[:index]: # if file/dir started as '.', we do not remove if alert_file[0] != '.': fs.rm_safe(os.path.join(alerts_dir, alert_file))
def prune_tickets(self): """Remove invalid tickets from directory.""" published_tickets = self.zkclient.get_children(z.TICKETS) for tkt in published_tickets: tkt_path = os.path.join(self.tkt_spool_dir, tkt) if not krbcc_ok(tkt_path): fs.rm_safe(tkt_path) zkutils.ensure_deleted(self.zkclient, z.path.tickets(tkt, self.hostname))
def _on_created(self, path): """This is the handler function when new files are seen.""" if not self._check_path(path): return _LOGGER.info('New manifest file - %r', path) self._create_instance(path) _LOGGER.info('Created, now removing - %r', path) fs.rm_safe(path)
def handle_tkt_delete(tkt_file): """Delete ticket info. """ princ = os.path.basename(tkt_file) if not _is_valid(princ): return infofile = os.path.join(tkt_info_dir, princ) _LOGGER.info('Deleting: %s', princ) fs.rm_safe(infofile)
def unpack(self, container_dir, root_dir, app): _LOGGER.debug('Extracting tar file %r to %r.', self.image_path, root_dir) with tarfile.open(self.image_path) as tar: tar.extractall(path=root_dir) native.NativeImage(self.tm_env).unpack(container_dir, root_dir, app) # TODO: cache instead of removing TAR files. fs.rm_safe(self.image_path)
def clt_del_request(self, req_id): """Remove an existing request. This should only be called by the client instance. """ svc_req_lnk = os.path.join(self._rsrc_dir, req_id) _LOGGER.info('Unregistering %r: %r', req_id, svc_req_lnk) fs.rm_safe(svc_req_lnk) return req_id
def rm_data(self, name): """Remove data from the cache. :param ``str`` name: Name for the data. """ entries = self._cached.pop(name, []) _LOGGER.info('Removing %r', entries) for cache_entry in entries: fs.rm_safe(cache_entry.fname)
def test_rm_safe(self): """Test safe rm/unlink.""" test_file = os.path.join(self.root, 'rmsafe_test') open(test_file, 'w+').close() self.assertTrue(os.path.isfile(test_file)) fs.rm_safe(test_file) self.assertFalse(os.path.exists(test_file)) fs.rm_safe(test_file) self.assertFalse(os.path.exists(test_file))
def _on_del_task_db(zk2fs_sync, zkpath, sow_db): """Called when task DB snapshot is deleted.""" fpath = zk2fs_sync.fpath(zkpath) fs.rm_safe(fpath) sow_db_copy = _copy_sow_db(sow_db) with sqlite3.connect(sow_db_copy) as conn: conn.execute("delete from sow where db = ?", (zkpath, )) conn.close() os.rename(sow_db_copy, sow_db)
def _create_status_socket(self): """Create a listening socket to process status requests. """ fs.rm_safe(self.status_sock) status_socket = socket.socket(family=socket.AF_UNIX, type=socket.SOCK_STREAM, proto=0) status_socket.bind(self.status_sock) os.chmod(self.status_sock, 0o666) status_socket.listen(5) return status_socket
def _cleanup(outdir, age): """Cleanup old report files.""" _LOGGER.info('Running cleanup.') age_sec = utils.to_seconds(age) now = time.time() for filename in os.listdir(outdir): fullpath = os.path.join(outdir, filename) created_at = os.stat(fullpath).st_ctime if created_at < now - age_sec: _LOGGER.info('Removing old file: %s', fullpath) fs.rm_safe(fullpath)
def update(self, rrdfile, data): """Updates rrd file with data, create if does not exist.""" rrd_update_str = ':'.join( [str(int(time.time())), _METRICS_FMT.format(**data)]) try: self.command('UPDATE %s %s' % (rrdfile, rrd_update_str)) except RRDError: # TODO: rather than deleting the file, better to # create new one with --source <old> option, so that # data is imported. (see rrdtool create docs). _LOGGER.exception('Error updating: %s', rrdfile) fs.rm_safe(rrdfile)
def _default_on_add(self, zkpath): """Default callback invoked on node is added, default - sync data. Race condition is possible in which added node does no longer exist when we try to sync data. """ try: self.sync_data(zkpath) except kazoo.client.NoNodeError: _LOGGER.warning('Tried to add node that no longer exists: %s', zkpath) fpath = self.fpath(zkpath) fs.rm_safe(fpath)
def _data_watch(self, zkpath, data, stat, event, fpath=None): """Invoked when data changes. """ fpath = fpath or self.fpath(zkpath) if event is not None and event.type == 'DELETED': _LOGGER.info('Node deleted: %s', zkpath) self.watches.discard(zkpath) fs.rm_safe(fpath) elif stat is None: _LOGGER.info('Node does not exist: %s', zkpath) self.watches.discard(zkpath) fs.rm_safe(fpath) else: self._write_data(fpath, data, stat)
def write(self): """Write down the service definition. """ # Disable R0912: Too many branche # pylint: disable=R0912 super(LongrunService, self).write() # Mandatory settings if self._run_script is None and not os.path.exists(self._run_file): raise ValueError('Invalid LongRun service: not run script') elif self._run_script is not None: _utils.script_write(self._run_file, self._run_script) # Handle the case where the run script is a generator if not isinstance(self._run_script, six.string_types): self._run_script = None # Optional settings if self._finish_script is not None: _utils.script_write(self._finish_file, self._finish_script) # Handle the case where the finish script is a generator if not isinstance(self._finish_script, six.string_types): self._finish_script = None if self._log_run_script is not None: # Create the log dir on the spot fs.mkdir_safe(os.path.dirname(self._log_run_file)) _utils.script_write(self._log_run_file, self._log_run_script) # Handle the case where the run script is a generator if not isinstance(self._log_run_script, six.string_types): self._log_run_script = None if self._default_down: _utils.data_write( os.path.join(self._dir, 'down'), None ) else: fs.rm_safe(os.path.join(self._dir, 'down')) if self._timeout_finish is not None: _utils.value_write( os.path.join(self._dir, 'timeout-finish'), self._timeout_finish ) if self._notification_fd is not None: _utils.value_write( os.path.join(self._dir, 'notification-fd'), self._notification_fd ) if self._pipeline_name is not None: _utils.data_write(self._pipeline_name_file, self._pipeline_name) if self._producer_for is not None: _utils.data_write(self._producer_for_file, self._producer_for) if self._consumer_for is not None: _utils.data_write(self._consumer_for_file, self._consumer_for)
def _trim_cache(self, name): """Cleanup the cache ensuring only the latest versions are present. :param ``str`` name: Key name for the data. """ cached = self._cached[name] if len(cached) == 1: return self._cached[name], extra = cached[:1], cached[1:] _LOGGER.debug('Trimming %r', extra) for cache_entry in extra: fs.rm_safe(cache_entry.fname)
def _update_request(rsrc_dir, req_id): """Update an existing request. This should only be called by the client instance. """ svc_req_lnk = os.path.join(rsrc_dir, req_id) _LOGGER.debug('Updating %r: %r', req_id, svc_req_lnk) # Remove any reply if it exists fs.rm_safe(os.path.join(svc_req_lnk, _base_service.REP_FILE)) # NOTE: This does the equivalent of a touch on the symlink try: os.lchown(svc_req_lnk, os.getuid(), os.getgid()) except OSError as err: if err.errno != errno.ENOENT: raise