def main(args=None): import argparse parser = argparse.ArgumentParser() parser.add_argument('-c', '--config', action='append', metavar='path', default=list(), help='Configuration files to process.' ' Can be specified more than once.' ' Values from the latter ones override values in the former.' ' Available CLI options override the values in any config.') parser.add_argument('-H', '--host', help='set the tcp host, supports IPv4 and IPv6 (default: %(default)s)') parser.add_argument('-p', '--port', type=int, help='set the tcp port') parser.add_argument('-v', '--version', action='store_true', help='display version information') parser.add_argument('-s', '--dont-start-skype', action='store_true', help="assume that skype is running independently, don't try to start/stop it") parser.add_argument('-m', '--mock', help='fake interactions with skype (only useful for tests)') parser.add_argument('-d', '--debug', action='store_true', help='enable debug messages') opts = parser.parse_args(sys.argv[1:] if args is None else args) if opts.version: print('skyped {}'.format(__version__)) return ## Read configuration cfg = lya.AttrDict.from_yaml('{}.yaml'.format( os.path.splitext(os.path.realpath(__file__))[0] )) for k in opts.config: cfg.update_yaml(k) if cfg: cfg.update_dict(cfg) ## Logging lya.configure_logging( cfg.logging, logging.DEBUG if opts.debug else logging.WARNING ) log = logging.getLogger('skyped.main') ## Process CLI overrides if opts.host: cfg.listen.host = opts.host if opts.port: cfg.listen.port = opts.port ## Start the thing try: server = SkypeProxy( cfg, mock=opts.mock, dont_start_skype=opts.dont_start_skype ) server.bind() except SkypeProxy.SetupError: return def excepthook(exc_type, exc_value, exc_tb): if exc_type != KeyboardInterrupt: try: log.fatal('skyped crashed ({}): {}'.format(exc_type, exc_value)) except: pass print_exception(exc_type, exc_value, exc_tb, limit=30) code = 1 else: code = 0 server.stop() sys.exit(code) sys.excepthook = excepthook log.info('skyped is started') server.run()
def main(): import argparse parser = argparse.ArgumentParser( description='Start the IRC helper bot.') parser.add_argument('-e', '--relay-enable', action='append', metavar='relay', default=list(), help='Enable only the specified relays, can be specified multiple times.') parser.add_argument('-d', '--relay-disable', action='append', metavar='relay', default=list(), help='Explicitly disable specified relays,' ' can be specified multiple times. Overrides --relay-enable.') parser.add_argument('-c', '--config', action='append', metavar='path', default=list(), help='Configuration files to process.' ' Can be specified more than once.' ' Values from the latter ones override values in the former.' ' Available CLI options override the values in any config.') parser.add_argument('-n', '--dry-run', action='store_true', help='Connect to IRC, but do not communicate there,' ' dumping lines-to-be-sent to the log instead.') parser.add_argument('--fatal-errors', action='store_true', help='Do not try to ignore entry_point' ' init errors, bailing out with traceback instead.') parser.add_argument('--debug', action='store_true', help='Verbose operation mode.') parser.add_argument('--debug-memleaks', action='store_true', help='Import guppy and enable its manhole to debug memleaks (requires guppy module).') parser.add_argument('--noise', action='store_true', help='Even more verbose mode than --debug.') optz = parser.parse_args() ## Read configuration files cfg = lya.AttrDict.from_yaml('{}.yaml'.format(splitext(realpath(__file__))[0])) for k in optz.config: cfg.update_yaml(k) ## CLI overrides if optz.dry_run: cfg.debug.dry_run = optz.dry_run ## Logging import logging logging.NOISE = logging.DEBUG - 1 logging.addLevelName(logging.NOISE, 'NOISE') try: from twisted.python.logger._stdlib import fromStdlibLogLevelMapping except ImportError: pass # newer twisted versions only else: fromStdlibLogLevelMapping[logging.NOISE] = logging.NOISE if optz.noise: lvl = logging.NOISE elif optz.debug: lvl = logging.DEBUG else: lvl = logging.WARNING lya.configure_logging(cfg.logging, lvl) log.PythonLoggingObserver().start() for lvl in 'noise', 'debug', 'info', ('warning', 'warn'), 'error', ('critical', 'fatal'): lvl, func = lvl if isinstance(lvl, tuple) else (lvl, lvl) assert not hasattr(log, lvl) setattr(log, func, ft.partial( log.msg, logLevel=logging.getLevelName(lvl.upper()) )) # Manholes if optz.debug_memleaks: import guppy from guppy.heapy import Remote Remote.on() ## Fake "xattr" module, if requested if cfg.core.xattr_emulation: import shelve xattr_db = shelve.open(cfg.core.xattr_emulation, 'c') class xattr_path(object): def __init__(self, base): assert isinstance(base, str) self.base = base def key(self, k): return '{}\0{}'.format(self.base, k) def __setitem__(self, k, v): xattr_db[self.key(k)] = v def __getitem__(self, k): return xattr_db[self.key(k)] def __del__(self): xattr_db.sync() class xattr_module(object): xattr = xattr_path sys.modules['xattr'] = xattr_module ## Actual init # Merge entry points configuration with CLI opts conf = ep_config( cfg, [ dict(ep='relay_defaults'), dict( ep='modules', enabled=optz.relay_enable, disabled=optz.relay_disable ) ] ) (conf_base, conf), (conf_def_base, conf_def) =\ op.itemgetter('modules', 'relay_defaults')(conf) for subconf in conf.viewvalues(): subconf.rebase(conf_base) relays, channels, routes = ( dict( (name, subconf) for name,subconf in conf.viewitems() if name[0] != '_' and subconf.get('type') == subtype ) for subtype in ['relay', 'channel', 'route'] ) # Init interface interface = routing.BCInterface( irc_enc=cfg.core.encoding, chan_prefix=cfg.core.channel_prefix, max_line_length=cfg.core.max_line_length, dry_run=cfg.debug.dry_run ) # Find out which relay entry_points are actually used route_mods = set(it.chain.from_iterable( it.chain.from_iterable( (mod if isinstance(mod, list) else [mod]) for mod in ((route.get(k) or list()) for k in ['src', 'dst', 'pipe']) ) for route in routes.viewvalues() )) for name in list(route_mods): try: name_ep = relays[name].name if name == name_ep: continue except KeyError: pass else: route_mods.add(name_ep) route_mods.remove(name) # Init relays relays_obj = dict() for ep in get_relay_list(): if ep.name[0] == '_': log.debug( 'Skipping entry_point with name' ' prefixed by underscore: {}'.format(ep.name) ) continue if ep.name not in route_mods: log.debug(( 'Skipping loading relay entry_point {}' ' because its not used in any of the routes' ).format(ep.name)) continue ep_relays = list( (name, subconf) for name, subconf in relays.viewitems() if subconf.get('name', name) == ep.name ) if not ep_relays: ep_relays = [(ep.name, conf_base.clone())] for name, subconf in ep_relays: try: relay_defaults = conf_def[ep.name] except KeyError: pass else: subconf.rebase(relay_defaults) subconf.rebase(conf_def_base) if subconf.get('enabled', True): log.debug('Loading relay: {} ({})'.format(name, ep.name)) try: obj = ep.load().relay(subconf, interface=interface) if not obj: raise AssertionError('Empty object') except Exception as err: if optz.fatal_errors: raise log.error('Failed to load/init relay {}: {} {}'.format(ep.name, type(err), err)) obj, subconf.enabled = None, False if obj and subconf.get('enabled', True): relays_obj[name] = obj else: log.debug(( 'Entry point object {!r} (name:' ' {}) was disabled after init' ).format(obj, ep.name) ) for name in set(relays).difference(relays_obj): log.debug(( 'Unused relay configuration - {}: no such' ' entry point - {}' ).format(name, relays[name].get('name', name))) if not relays_obj: log.fatal('No relay objects were properly enabled/loaded, bailing out') sys.exit(1) log.debug('Enabled relays: {}'.format(relays_obj)) # Relays-client interface interface.update(relays_obj, channels, routes) # Server if cfg.core.connection.server.endpoint: password = cfg.core.connection.get('password') if not password: from hashlib import sha1 password = cfg.core.connection.password =\ sha1(open('/dev/urandom', 'rb').read(120/8)).hexdigest() factory = irc.BCServerFactory( cfg.core.connection.server, *(chan.get('name', name) for name,chan in channels.viewitems()), **{cfg.core.connection.nickname: password} ) endpoints\ .serverFromString(reactor, cfg.core.connection.server.endpoint)\ .listen(factory) # Client with proper endpoints + reconnection # See: http://twistedmatrix.com/trac/ticket/4472 + 4700 + 4735 ep = endpoints.clientFromString(reactor, cfg.core.connection.endpoint) irc.BCClientFactory(cfg.core, interface, ep).connect() log.debug('Starting event loop') reactor.run()
def main(): import argparse parser = argparse.ArgumentParser( description='Collect and dispatch various metrics to destinations.') parser.add_argument('-t', '--destination', metavar='host[:port]', help='host[:port] (default port: 2003, can be overidden' ' via config file) of sink destination endpoint (e.g. carbon' ' linereceiver tcp port, by default).') parser.add_argument('-i', '--interval', type=int, metavar='seconds', help='Interval between collecting and sending the datapoints.') parser.add_argument('-e', '--collector-enable', action='append', metavar='collector', default=list(), help='Enable only the specified metric collectors,' ' can be specified multiple times.') parser.add_argument('-d', '--collector-disable', action='append', metavar='collector', default=list(), help='Explicitly disable specified metric collectors,' ' can be specified multiple times. Overrides --collector-enable.') parser.add_argument('-s', '--sink-enable', action='append', metavar='sink', default=list(), help='Enable only the specified datapoint sinks,' ' can be specified multiple times.') parser.add_argument('-x', '--sink-disable', action='append', metavar='sink', default=list(), help='Explicitly disable specified datapoint sinks,' ' can be specified multiple times. Overrides --sink-enable.') parser.add_argument('-p', '--processor-enable', action='append', metavar='processor', default=list(), help='Enable only the specified datapoint processors,' ' can be specified multiple times.') parser.add_argument('-z', '--processor-disable', action='append', metavar='processor', default=list(), help='Explicitly disable specified datapoint processors,' ' can be specified multiple times. Overrides --processor-enable.') parser.add_argument('-c', '--config', action='append', metavar='path', default=list(), help='Configuration files to process.' ' Can be specified more than once.' ' Values from the latter ones override values in the former.' ' Available CLI options override the values in any config.') parser.add_argument('-a', '--xattr-emulation', metavar='db-path', help='Emulate filesystem extended attributes (used in' ' some collectors like sysstat or cron_log), storing per-path' ' data in a simple shelve db.') parser.add_argument('-n', '--dry-run', action='store_true', help='Do not actually send data.') parser.add_argument('--debug', action='store_true', help='Verbose operation mode.') optz = parser.parse_args() # Read configuration files cfg = AttrDict.from_yaml('{}.yaml'.format( os.path.splitext(os.path.realpath(__file__))[0] )) for k in optz.config: cfg.update_yaml(k) # Logging import logging configure_logging( cfg.logging, logging.DEBUG if optz.debug else logging.WARNING ) if not cfg.logging.tracebacks: class NoTBLogger(logging.Logger): def exception(self, *argz, **kwz): self.error(*argz, **kwz) logging.setLoggerClass(NoTBLogger) log = logging.getLogger(__name__) # Fill "auto-detected" blanks in the configuration, CLI overrides try: if optz.destination: cfg.sinks._default.host = optz.destination cfg.sinks._default.host = cfg.sinks._default.host.rsplit(':', 1) if len(cfg.sinks._default.host) == 1: cfg.sinks._default.host =\ cfg.sinks._default.host[0], cfg.sinks._default.default_port else: cfg.sinks._default.host[1] = int(cfg.sinks._default.host[1]) except KeyError: pass if optz.interval: cfg.loop.interval = optz.interval if optz.dry_run: cfg.debug.dry_run = optz.dry_run if optz.xattr_emulation: cfg.core.xattr_emulation = optz.xattr_emulation # Fake "xattr" module, if requested if cfg.core.xattr_emulation: import shelve xattr_db = shelve.open(cfg.core.xattr_emulation, 'c') class xattr_path(object): def __init__(self, base): assert isinstance(base, str) self.base = base def key(self, k): return '{}\0{}'.format(self.base, k) def __setitem__(self, k, v): xattr_db[self.key(k)] = v def __getitem__(self, k): return xattr_db[self.key(k)] def __del__(self): xattr_db.sync() class xattr_module(object): xattr = xattr_path sys.modules['xattr'] = xattr_module # Override "enabled" collector/sink parameters, based on CLI ep_conf = dict() for ep, enabled, disabled in\ [ ('collectors', optz.collector_enable, optz.collector_disable), ('processors', optz.processor_enable, optz.processor_disable), ('sinks', optz.sink_enable, optz.sink_disable) ]: conf = cfg[ep] conf_base = conf.pop('_default') if 'debug' not in conf_base: conf_base['debug'] = cfg.debug ep_conf[ep] = conf_base, conf, OrderedDict(), enabled, disabled # Init global cfg for collectors/sinks' usage from graphite_metrics import collectors, sinks, loops collectors.cfg = sinks.cfg = loops.cfg = cfg # Init pluggable components import pkg_resources for ep_type in 'collector', 'processor', 'sink': ep_key = '{}s'.format(ep_type) # a bit of a hack conf_base, conf, objects, enabled, disabled = ep_conf[ep_key] ep_dict = dict( (ep.name, ep) for ep in pkg_resources.iter_entry_points('graphite_metrics.{}'.format(ep_key)) ) eps = OrderedDict( (name, (ep_dict.pop(name), subconf or AttrDict())) for name, subconf in conf.viewitems() if name in ep_dict ) eps.update( (name, (module, conf_base)) for name, module in ep_dict.viewitems() ) for ep_name, (ep_module, subconf) in eps.viewitems(): if ep_name[0] == '_': log.debug( 'Skipping {} enty point,' ' prefixed by underscore: {}'.format(ep_type, ep_name) ) subconf.rebase(conf_base) # fill in "_default" collector parameters if enabled: if ep_name in enabled: subconf['enabled'] = True else: subconf['enabled'] = False if disabled and ep_name in disabled: subconf['enabled'] = False if subconf.get('enabled', True): log.debug('Loading {}: {}'.format(ep_type, ep_name)) try: obj = getattr(ep_module.load(), ep_type)(subconf) except Exception as err: log.exception('Failed to load/init {} ({}): {}'.format(ep_type, ep_name, err)) subconf.enabled = False obj = None if subconf.get('enabled', True): objects[ep_name] = obj else: log.debug(( '{} {} (entry point: {})' ' was disabled after init' ).format(ep_type.title(), obj, ep_name)) if ep_type != 'processor' and not objects: log.fatal('No {}s were properly enabled/loaded, bailing out'.format(ep_type)) sys.exit(1) log.debug('{}: {}'.format(ep_key.title(), objects)) loop = dict( (ep.name, ep) for ep in pkg_resources.iter_entry_points('graphite_metrics.loops') ) conf = AttrDict(**cfg.loop) if 'debug' not in conf: conf.debug = cfg.debug loop = loop[cfg.loop.name].load().loop(conf) collectors, processors, sinks = it.imap( op.itemgetter(2), op.itemgetter('collectors', 'processors', 'sinks')(ep_conf) ) log.debug( 'Starting main loop: {} ({} collectors, {} processors, {} sinks)'\ .format(loop, len(collectors), len(processors), len(sinks)) ) loop.start(collectors, processors, sinks)
def main(argv=None, config=None): import argparse parser = argparse.ArgumentParser( description='LAFS backup tool.') parser.add_argument('-c', '--config', action='append', metavar='path', default=list(), help='Configuration files to process.' ' Can be specified more than once.' ' Values from the latter ones override values in the former.' ' Available CLI options override the values in any config.') parser.add_argument('--debug', action='store_true', help='Verbose operation mode.') parser.add_argument('--noise', action='store_true', help='Even more verbose mode than --debug.') cmds = parser.add_subparsers( title='Supported operations (have their own suboptions as well)') @contextmanager def subcommand(name, **kwz): cmd = cmds.add_parser(name, **kwz) cmd.set_defaults(call=name) yield cmd with subcommand('backup', help='Backup data to LAFS.') as cmd: cmd.add_argument('--queue-only', nargs='?', metavar='path', default=False, help='Only generate upload queue file (path can' ' be specified as an optional argument) and stop there.') cmd.add_argument('--reuse-queue', nargs='?', metavar='path', default=False, help='Do not generate upload queue file, use' ' existing one (path can be specified as an argument) as-is.') cmd.add_argument('-f', '--force-queue-rebuild', action='store_true', help='Force upload queue file rebuild,' ' even if one already exists and is recent enough to be reused.') cmd.add_argument('--disable-deduplication', action='store_true', help='Make no effort to de-duplicate data (should still work on tahoe-level for files).') cmd.add_argument('-r', '--try-resume', action='store_true', help='If last-gen backup was not finished (no top-level cap recorded) - reuse' ' same generation number, bumping and updating only paths with lesser number.') with subcommand('cleanup', help='Remove the backup from local caches and unlink from' ' LAFS destination mutable directory (if configured).' ' Purpose is to make local system (and accessible from it lafs path) "forget"' ' about specified backups, not to actually remove any backed-up data.') as cmd: cmd.add_argument('root_cap', nargs='*', metavar='LAFS-URI', default=list(), help='LAFS URI(s) of the backup(s) to remove.' ' If not specified (or "-" is used), will be read from stdin.') cmd.add_argument('--up-to', action='store_true', help='Make sure to remove all the previous known backups / generations as well.') cmd.add_argument('-e', '--enumerate-shares', default=False, nargs='?', metavar='dst_file', help='Do a "stream-manifest" operation with removed caps,' ' picking shareid of all the shares that are to be removed from the db.' ' Plain list of one-per-line share-ids (as returned by node)' ' will be written to a file specified as an argument' ' or stdout, if it is not specified or "-" is specified instead.' ' As each cleaned-up backup is crawled separately,' ' resulting list may contain duplicate shares (if backups share some files).' ' Note that just removing these may cause same-content' ' files from newer backups become unavailable as well' ' (unless convergence secret was updated for these).') cmd.add_argument('-n', '--enumerate-only', action='store_true', help='Do not remove any info from cache/destinations,' ' just query and print the known-to-be-affected shares.' ' Only makes sense with --enumerate-shares option.') cmd.add_argument('-g', '--generation', action='append', type=int, default=list(), metavar='gen_id', help='Also remove specified backup generations. Affected by --up-to option.' ' If no URIs (or "-") will be specified as arguments, stdin stream wont be scanned' ' for them and only specified (with this option) backup generations will be removed.') with subcommand('list', help='List known finished backups.') as cmd: cmd.add_argument('-g', '--generations', action='append', type=int, nargs='*', metavar='gen_id', help='Also list dangling entries in cache with generation numbers' ' not linked to any finished backup. More specific generation numbers' ' can be specified as an arguments to only list these.') with subcommand('check', help='Check health of a backup(s) and extend leases on its nodes.' ' If corruption is detected, only problematic paths are printed (by default).') as cmd: cmd.add_argument('root_cap', nargs='*', metavar='LAFS-URI', default=list(), help='LAFS URI(s) of the backup(s) to check' ' (tahoe "stream-deep-check" operation is used).' ' If "-" is specified instead, will be read from stdin.') cmd.add_argument('-a', '--least-recently-checked', action='store_true', help='Pick and check just one least recently checked URI from' ' the ones specified (if any), or all known finished backups otherwise.') cmd.add_argument('-r', '--repair', action='store_true', help='Perform "repair" operation as well, if necessary, reporting only repair failures.') cmd.add_argument('-n', '--no-lease', action='store_true', help='Do not extend leases on the backup nodes (extended by default).') cmd.add_argument('-f', '--format', metavar='format_spec', default='{backup[name]} {0[path]} ({0[type]})', help='Python format string (passed to string.format function)' ' to use for logging lines about corrupted nodes.' ' "response unit" (as described in tahoe webapi.rst section on stream-deep-check)' ' is passed to format function as the only parameter.' ' Examples: "{backup[name]} {0[path]} ({0[type]})" - default format,' ' "{0}" - full dump of "response unit" object (verbose),' ' "path: {0[path]}, repaircap: {0[repaircap]},' ' good_hosts: {0[check-results][results][count-good-share-hosts]}".') cmd.add_argument('--healthy-format', metavar='format_spec', help='If specified, healthy paths will be printed' ' as well according to it (see --format for details).') cmd.add_argument('-e', '--error-output', action='store_true', help='Send output to stderr instead of stdout,' ' exit with error status if any corrupted nodes were detected.' ' Can be useful if script runs from crontab or something similar.') with subcommand('dump_config', help='Dump configuration to stdout and exit.') as cmd: pass optz = parser.parse_args(argv or sys.argv[1:]) ## Read configuration cfg = lya.AttrDict.from_yaml('{}.yaml'.format( os.path.splitext(os.path.realpath(__file__))[0] )) for k in optz.config: cfg.update_yaml(k) if config: cfg.update_dict(config) ## Logging from twisted.python import log as twisted_log if cfg.logging.tag: TaggedLogger.tag = cfg.logging.tag noise = logging.NOISE = logging.DEBUG - 1 logging.addLevelName(noise, 'NOISE') def noise(self, msg, noise=noise): if self.isEnabledFor(noise): self._log(noise, msg, ()) logging.Logger.noise = noise if optz.noise: lvl = logging.NOISE elif optz.debug: lvl = logging.DEBUG else: lvl = logging.WARNING lya.configure_logging(cfg.logging, lvl) twisted_log.PythonLoggingObserver().start() log = logging.getLogger(__name__) ## Manhole manhole = manhole_ns = None if cfg.manhole.endpoint: from lafs_backup import manhole if not cfg.manhole.client: parser.error(( 'Manhole is enabled in configuration (endpoint: {}),' ' but no authorized client keys specified.' ).format(cfg.manhole.endpoint)) if is_str(cfg.manhole.client): cfg.manhole.client = [cfg.manhole.client] fold_pubkey = lambda key,sep='':\ sep.join(it.imap(op.methodcaller('strip'), key.splitlines())) cfg.manhole.client = map(fold_pubkey, cfg.manhole.client) cfg.manhole.server.public = fold_pubkey(cfg.manhole.server.public) cfg.manhole.server.private = fold_pubkey(cfg.manhole.server.private, '\n') manhole_ns = dict(test='success!!!') manhole = manhole.build_service( cfg.manhole.endpoint, authorized_keys=cfg.manhole.client, server_keys=(cfg.manhole.server.public, cfg.manhole.server.private), namespace=manhole_ns ) if not cfg.manhole.on_signal: manhole.startService() else: import signal try: try: signum = int(cfg.manhole.on_signal) except ValueError: signum = cfg.manhole.on_signal.upper() try: signum = getattr(signal, 'SIG{}'.format(signum)) except AttributeError: signum = getattr(signal, signum) except Exception as err: parser.error( 'Failed to translate value' ' ({!r}) to signal: {}'.format(cfg.manhole.on_signal, err) ) def toggle_manhole(sig, frm, svc=manhole): if not svc.running: log.info('Starting manhole service (signal {})'.format(sig)) reactor.callFromThread(svc.startService) else: log.info('Stopping manhole service (signal {})'.format(sig)) reactor.callFromThread(svc.stopService) signal.signal(signum, toggle_manhole) ## Operation-specific CLI processing if optz.call == 'backup': if optz.disable_deduplication: cfg.operation.disable_deduplication = optz.disable_deduplication if optz.force_queue_rebuild: cfg.source.queue.check_mtime = False if optz.queue_only is not False: if optz.queue_only is not None: cfg.source.queue.path = optz.queue_only cfg.operation.queue_only = True else: # Check some paramaters, used in the upload phase if cfg.destination.encoding.xz.enabled and not lzma: raise ImportError('Unable to import lzma module') for metric, spec in cfg.operation.rate_limit.viewitems(): if not spec: continue spec = token_bucket(metric, spec) next(spec) cfg.operation.rate_limit[metric] = spec cfg.operation.try_resume = optz.try_resume if optz.reuse_queue is not False: if optz.force_queue_rebuild: parser.error('Options --force-queue-rebuild' ' and --reuse-queue cannot be used together.') if optz.reuse_queue is not None: cfg.source.queue.path = optz.reuse_queue cfg.operation.reuse_queue = True lafs_op = LAFSBackup(cfg) elif optz.call == 'cleanup': caps = set(optz.root_cap).difference({'-'}) if (not optz.generation and not optz.root_cap) or '-' in optz.root_cap: caps.update(it.ifilter(None, (line.strip() for line in sys.stdin))) if optz.enumerate_only and optz.enumerate_shares is False: parser.error('Option --enumerate-only can only be used with --enumerate-shares.') lafs_op = LAFSCleanup( cfg, caps, optz.generation, optz.up_to, optz.enumerate_shares, optz.enumerate_only ) elif optz.call == 'list': if optz.generations is not None: optz.generations = set(it.chain.from_iterable(optz.generations)) lafs_op = LAFSList(cfg, list_dangling_gens=optz.generations) elif optz.call == 'check': caps = set(optz.root_cap).difference({'-'}) if '-' in optz.root_cap: caps.update(it.ifilter(None, (line.strip() for line in sys.stdin))) lafs_op = LAFSCheck( cfg, caps, fmt_ok=optz.healthy_format, fmt_err=optz.format, pick=optz.least_recently_checked, lease=not optz.no_lease, repair=optz.repair, err_out=optz.error_output )\ if caps or optz.least_recently_checked else None elif optz.call == 'dump_config': lafs_op = ft.partial(cfg.dump, sys.stdout) else: parser.error('Unrecognized command: {}'.format(optz.call)) ## Actual work if lafs_op: if manhole_ns: # populate manhole namespace with relevant objects manhole_ns.update( config=cfg, optz=optz, optz_parser=parser, lafs_op=lafs_op, log=log, inspect=inspect, traceback=traceback ) def _stop(res): lafs_op.debug_frame = None if isinstance(res, (Exception, Failure)): global exit_code exit_code = 1 if isinstance(res, CleanBreak) or res.check(CleanBreak): log.info(res.value.message) res = None else: log_web_failure(log, res) if reactor.running: reactor.stop() return res # will still be raised/logged by twisted, if not defused reactor.callWhenRunning( lambda: defer.maybeDeferred( lafs_op.run if not callable(lafs_op) else lafs_op ).addBoth(_stop) ) log.debug('Starting...') reactor.run() log.debug('Finished') return exit_code
def main(args=None): import argparse parser = argparse.ArgumentParser() parser.add_argument( "-c", "--config", action="append", metavar="path", default=list(), help="Configuration files to process." " Can be specified more than once." " Values from the latter ones override values in the former." " Available CLI options override the values in any config.", ) parser.add_argument("-H", "--host", help="set the tcp host, supports IPv4 and IPv6 (default: %(default)s)") parser.add_argument("-p", "--port", type=int, help="set the tcp port") parser.add_argument("-v", "--version", action="store_true", help="display version information") parser.add_argument( "-s", "--dont-start-skype", action="store_true", help="assume that skype is running independently, don't try to start/stop it", ) parser.add_argument("-m", "--mock", help="fake interactions with skype (only useful for tests)") parser.add_argument("-d", "--debug", action="store_true", help="enable debug messages") opts = parser.parse_args(sys.argv[1:] if args is None else args) if opts.version: print("skyped {}".format(__version__)) return ## Read configuration cfg = lya.AttrDict.from_yaml("{}.yaml".format(os.path.splitext(os.path.realpath(__file__))[0])) for k in opts.config: cfg.update_yaml(k) if cfg: cfg.update_dict(cfg) ## Logging lya.configure_logging(cfg.logging, logging.DEBUG if opts.debug else logging.WARNING) log = logging.getLogger("skyped.main") ## Process CLI overrides if opts.host: cfg.listen.host = opts.host if opts.port: cfg.listen.port = opts.port ## Start the thing try: server = SkypeProxy(cfg, mock=opts.mock, dont_start_skype=opts.dont_start_skype) server.bind() except SkypeProxy.SetupError: return def excepthook(exc_type, exc_value, exc_tb): if exc_type != KeyboardInterrupt: try: log.fatal("skyped crashed ({}): {}".format(exc_type, exc_value)) except: pass print_exception(exc_type, exc_value, exc_tb, limit=30) code = 1 else: code = 0 server.stop() sys.exit(code) sys.excepthook = excepthook log.info("skyped is started") server.run()
def main(argv=None): import argparse parser = argparse.ArgumentParser( description='Check integrity of at-rest files/data.') parser.add_argument('-c', '--config', action='append', metavar='path', default=list(), help='Configuration files to process.' ' Can be specified more than once.' ' Values from the latter ones override values in the former.' ' Available CLI options override the values in any config.') parser.add_argument('--debug', action='store_true', help='Verbose operation mode.') cmds = parser.add_subparsers( title='Supported operations (have their own suboptions as well)') @contextmanager def subcommand(name, **kwz): cmd = cmds.add_parser(name, **kwz) cmd.set_defaults(call=name) yield cmd with subcommand('scrub', help='Scrub configured paths, detecting bitrot,' ' updating checksums on legitimate changes and adding new files.') as cmd: cmd.add_argument('-s', '--scan-only', action='store_true', help='Do not process file contents (or open' ' them) in any way, just scan for new/modified files.') cmd.add_argument('-r', '--resume', action='store_true', help='Dont scan any paths, but rather resume scrubbing from the point' ' where the last run was interrupted. Mutually exclusive with --scan-only.') cmd.add_argument('-p', '--extra-paths', nargs='+', metavar='path', help='Extra paths to append to the one(s) configured via "storage.path".' ' Can be used to set the list of paths dynamically (e.g., via wildcard from shell).') with subcommand('status', help='List files with status recorded in the database.') as cmd: cmd.add_argument('-v', '--verbose', action='store_true', help='Display last check and modification info along with the path.') cmd.add_argument('-d', '--dirty', action='store_true', help='Only list files which are known to be modified since last checksum update.') cmd.add_argument('-c', '--checked', action='store_true', help='Only list files which were checked on the last run.') cmd.add_argument('-u', '--not-checked', action='store_true', help='Only list files which were left unchecked on the last run.') # cmd.add_argument('-n', '--new', action='store_true', # help='Files that are not yet recorded at all, but exist on disk. Implies fs scan.') optz = parser.parse_args(sys.argv[1:] if argv is None else argv) ## Read configuration files import lya cfg = lya.AttrDict.from_yaml('{}.yaml'.format( os.path.splitext(os.path.realpath(__file__))[0] )) for k in optz.config: cfg.update_yaml(k) lya.configure_logging( cfg.logging, logging.WARNING if not optz.debug else logging.DEBUG ) log = logging.getLogger('bitrot_scrubber.root') ## Options processing if not cfg.storage.metadata.db: parser.error('Path to metadata db ("storage.metadata.db") must be configured.') try: cfg.operation.checksum = getattr(xxhash, cfg.operation.checksum) except AttributeError: cfg.operation.checksum = hashlib.new(cfg.operation.checksum) if is_str(cfg.storage.path): cfg.storage.path = [cfg.storage.path] else: cfg.storage.path = list(cfg.storage.path or list()) _filter_actions = {'+': True, '-': False} cfg.storage.filter = list( (_filter_actions[pat[0]], re.compile(pat[1:])) for pat in (cfg.storage.filter or list()) ) for metric, spec in cfg.operation.rate_limit.viewitems(): if not spec: continue spec = token_bucket(metric, spec) next(spec) cfg.operation.rate_limit[metric] = spec if cfg.storage.metadata.db_parity is None: cfg.storage.metadata.db_parity = cfg.storage.metadata.db + '.check' skip_for = cfg.operation.skip_for_hours * 3600 cfg.operation.read_block = int(cfg.operation.read_block) ## Actual work log.debug('Starting (operation: {})'.format(optz.call)) with db.MetaDB( cfg.storage.metadata.db, cfg.storage.metadata.db_parity, cfg.operation.checksum, log_queries=cfg.logging.sql_queries, use_fadvise=cfg.operation.use_fadvise, commit_after=op.itemgetter('queries', 'seconds')\ (cfg.storage.metadata.db_commit_after) ) as meta_db: if optz.call == 'scrub': if optz.scan_only and optz.resume: parser.error('Either --scan-only or --resume can be specified, not both.') if optz.extra_paths: cfg.storage.path.extend(optz.extra_paths) if not cfg.storage.path: parser.error( 'At least one path to scrub must' ' be specified (via "storage.path" in config or on commandline).' ) scrub( cfg.storage.path, meta_db, scan_only=optz.scan_only, resume=optz.resume, xdev=cfg.storage.xdev, path_filter=cfg.storage.filter, skip_for=skip_for, bs=cfg.operation.read_block, rate_limits=cfg.operation.rate_limit ) elif optz.call == 'status': first_row = True for info in meta_db.list_paths(): if optz.dirty and not info['dirty']: continue if optz.not_checked and info['clean']: continue if optz.checked and not info['clean']: continue if not optz.verbose: print(info['path']) else: if not first_row: print() else: first_row = False print('path: {}'.format(info['path'])) print(' checked: {0[last_scrub]} (last run: {0[clean]})\n dirty: {0[dirty]}{1}'.format( info, ', skipped: {}'.format(info['last_skip']) if info['last_skip'] else '' )) else: raise ValueError('Unknown command: {}'.format(optz.call)) log.debug('Done')