def test_app_add_command_options(self): option_name = 'test_option_name' option = options.TwitterOption('--test', dest=option_name) @app.command_option(option) def test_command(): pass assert not hasattr(app.get_options(), option_name) app.add_command_options(test_command) assert hasattr(app.get_options(), option_name)
def __init__(self, servers=None, timeout_secs=None, watch=None, max_reconnects=MAX_RECONNECTS, logger=log.debug): """Create new ZooKeeper object. Blocks until ZK negotation completes, or the timeout expires. By default only tries to connect once. Use a larger 'max_reconnects' if you want to be resilient to things such as DNS outages/changes. If watch is set to a function, it is called whenever the global zookeeper watch is dispatched using the same function signature, with the exception that this object is used in place of the zookeeper handle. """ default_ensemble = self.DEFAULT_ENSEMBLE default_timeout = self.DEFAULT_TIMEOUT_SECONDS if WITH_APP: options = app.get_options() default_ensemble = options.zookeeper default_timeout = options.zookeeper_timeout self._servers = servers or default_ensemble self._timeout_secs = timeout_secs or default_timeout self._init_count = 0 self._live = threading.Event() self._stopped = threading.Event() self._completions = Queue() self._zh = None self._watch = watch self._logger = logger self._max_reconnects = max_reconnects self.reconnect()
def create_tunnel(cls, remote_host, remote_port, tunnel_host=None, tunnel_port=None): """ Create a tunnel from the localport to the remote host & port, using sshd_host as the tunneling server. """ tunnel_key = (remote_host, remote_port) if tunnel_key in cls.TUNNELS: return 'localhost', cls.TUNNELS[tunnel_key][0] if HAS_APP: tunnel_host = tunnel_host or app.get_options().tunnel_host assert tunnel_host is not None, 'Must specify tunnel host!' tunnel_port = tunnel_port or cls.get_random_port() ssh_cmd_args = ('ssh', '-T', '-L', '%d:%s:%s' % (tunnel_port, remote_host, remote_port), tunnel_host) cls.TUNNELS[tunnel_key] = (tunnel_port, subprocess.Popen(ssh_cmd_args, stdin=subprocess.PIPE)) if not cls.wait_for_accept(tunnel_port): raise cls.TunnelError('Could not establish tunnel via %s' % remote_host) return 'localhost', tunnel_port
def perform_maintenance_hosts(cluster): """usage: perform_maintenance cluster [--filename=filename] [--hosts=hosts] [--batch_size=num] [--post_drain_script=path] [--grouping=function] Asks the scheduler to remove any running tasks from the machine and remove it from service temporarily, perform some action on them, then return the machines to service. """ options = app.get_options() drainable_hosts = parse_hosts(options) if options.post_drain_script: if not os.path.exists(options.post_drain_script): die("No such file: %s" % options.post_drain_script) cmd = os.path.abspath(options.post_drain_script) drained_callback = lambda host: subprocess.Popen([cmd, host]) else: drained_callback = None MesosMaintenance(CLUSTERS[cluster], options.verbosity).perform_maintenance( drainable_hosts, batch_size=int(options.batch_size), callback=drained_callback, grouping_function=options.grouping)
def end_maintenance_hosts(cluster): """usage: end_maintenance_hosts cluster [--filename=filename] [--hosts=hosts] """ options = app.get_options() MesosMaintenance(CLUSTERS[cluster], options.verbosity).end_maintenance(parse_hosts(options))
def simple(): """ Whether or not simple logging should be used. """ if LogOptions._SIMPLE is None: LogOptions._SIMPLE = app.get_options().twitter_common_log_simple return LogOptions._SIMPLE
def sla_probe_hosts(cluster, percentage, duration): """usage: sla_probe_hosts [--filename=FILENAME] [--grouping=GROUPING] [--hosts=HOSTS] [--min_job_instance_count=COUNT] cluster percentage duration Probes individual hosts with respect to their job SLA. Specifically, given a host, outputs all affected jobs with their projected SLAs if the host goes down. In addition, if a job's projected SLA does not clear the specified limits suggests the approximate time when that job reaches its SLA. Output format: HOST JOB PREDICTED_SLA SAFE? PREDICTED_SAFE_IN where: HOST - host being probed. JOB - job that has tasks running on the host being probed. PREDICTED_SLA - predicted effective percentage of up tasks if the host is shut down. SAFE? - PREDICTED_SLA >= percentage PREDICTED_SAFE_IN - expected wait time in seconds for the job to reach requested SLA threshold. """ options = app.get_options() sla_percentage = parse_sla_percentage(percentage) sla_duration = parse_time(duration) hosts = parse_hostnames(options.filename, options.hosts) get_grouping_or_die(options.grouping) vector = make_admin_client(cluster).sla_get_safe_domain_vector(options.min_instance_count, hosts) groups = vector.probe_hosts(sla_percentage, sla_duration.as_(Time.SECONDS), options.grouping) output, _ = format_sla_results(groups) print_results(output)
def sla_host_drain(cluster): """usage: sla_host_drain {--filename=filename | --hosts=hosts} [--default_percentage=percentage] [--default_duration=duration] [--force_drain_timeout=timeout] cluster Asks the scheduler to drain the list of provided hosts in an SLA-aware manner. The list of hosts is drained and marked in a drained state. This will kill off any tasks currently running on these hosts, as well as prevent future tasks from scheduling on these hosts while they are drained. The hosts are left in maintenance mode upon completion. Use host_activate to return hosts back to service and allow scheduling tasks on them. If tasks are unable to be drained after the specified timeout interval they will be forcefully drained even if it breaks SLA. """ options = app.get_options() drainable_hosts = parse_hostnames(options.filename, options.hosts) percentage, duration, timeout = parse_and_validate_sla_drain_default( options) HostMaintenance(cluster=CLUSTERS[cluster], verbosity=options.verbosity, bypass_leader_redirect=options.bypass_leader_redirect ).perform_sla_maintenance(drainable_hosts, percentage=percentage, duration=duration, timeout=timeout)
def main(args): options = app.get_options() if options.show_help: app.help() if options.show_version or options.just_version: print >> sys.stdout, 'Python NailGun client version 0.0.1' if options.just_version: sys.exit(0) # Assume ng.pex has been aliased to the command name command = re.compile('.pex$').sub('', os.path.basename(sys.argv[0])) args_index = 0 # Otherwise the command name is the 1st arg if command == 'ng': if not args: app.help() command = args[0] args_index = 1 ng = NailgunClient(host=options.ng_host, port=options.ng_port) result = ng(command, *args[args_index:], **os.environ) sys.exit(result)
def restore(j, target): """ Restore jobs from a config directory """ config_dir = app.get_options().config_dir if config_dir is None: log.error("no config_dir defined.") sys.exit() if not os.path.exists(os.path.realpath(config_dir)): log.error("config path does not exist") sys.exit() for job in os.listdir(config_dir): # here we need to: # check for config.xml # check for job on target server # if job exists, update it # if not create it. config_file = "%s/%s/config.xml" % (config_dir, job) if not os.path.exists(config_file): log.error("config file does not exist: %s" %config_file) sys.exit() job_xml = read_config(config_file) try: jobj = j.get_job(job) if not jobj.get_config() == job_xml: log.info("Updating %s" % job) jobj.update_config(job_xml) except UnknownJob as e: log.error("job doesnt exist. creating") j.create_job(job, job_xml)
def main(args): options = app.get_options() if options.show_help: app.help() if options.show_version or options.just_version: print('Python NailGun client version %s' % VERSION) if options.just_version: sys.exit(0) # Assume ng.pex has been aliased to the command name command = re.compile('.pex$').sub('', os.path.basename(sys.argv[0])) args_index = 0 # Otherwise the command name is the 1st arg if command == 'ng': if not args: app.help() command = args[0] args_index = 1 ng = NailgunClient(host=options.ng_host, port=options.ng_port) try: result = ng(command, *args[args_index:], **os.environ) sys.exit(result) except ng.NailgunError as e: print('Problem executing command: %s' % e, file=sys.stderr) sys.exit(1)
def list_jobs(cluster_and_role): """usage: list_jobs [--show-cron] cluster/role/env/job Shows all jobs that match the job-spec known by the scheduler. If --show-cron is specified, then also shows the registered cron schedule. """ def show_job_simple(job): if options.show_cron_schedule: print(('{0}/{1.key.role}/{1.key.environment}/{1.key.name}' + '\t\'{1.cronSchedule}\'\t{1.cronCollisionPolicy}').format(cluster, job)) else: print('{0}/{1.key.role}/{1.key.environment}/{1.key.name}'.format(cluster, job)) def show_job_pretty(job): print("Job %s/%s/%s/%s:" % (cluster, job.key.role, job.key.environment, job.key.name)) print('\tcron schedule: %s' % job.cronSchedule) print('\tcron policy: %s' % job.cronCollisionPolicy) options = app.get_options() if options.show_cron_schedule and options.pretty: print_fn = show_job_pretty else: print_fn = show_job_simple # Take the cluster_and_role parameter, and split it into its two components. if cluster_and_role.count('/') != 1: die('list_jobs parameter must be in cluster/role format') (cluster,role) = cluster_and_role.split('/') api = make_client(cluster) resp = api.get_jobs(role) check_and_log_response(resp) for job in resp.result.getJobsResult.configs: print_fn(job)
def make_admin_client_with_options(cluster): options = app.get_options() return make_admin_client( cluster=cluster, verbose=getattr(options, 'verbosity', 'normal') == 'verbose', bypass_leader_redirect=options.bypass_leader_redirect)
def scheduler_backup_now(cluster): """usage: scheduler_backup_now cluster Immediately initiates a full storage backup. """ options = app.get_options() check_and_log_response(AuroraClientAPI(CLUSTERS[cluster], options.verbosity).perform_backup())
def increase_quota(cluster, role, cpu_str, ram_str, disk_str): """usage: increase_quota cluster role cpu ram[unit] disk[unit] Increases the amount of production quota allocated to a user. """ cpu = float(cpu_str) ram = parse_data(ram_str) disk = parse_data(disk_str) options = app.get_options() client = AuroraClientAPI(CLUSTERS[cluster], options.verbosity == 'verbose') resp = client.get_quota(role) quota = resp.result.getQuotaResult.quota log.info('Current quota for %s:\n\tCPU\t%s\n\tRAM\t%s MB\n\tDisk\t%s MB' % (role, quota.numCpus, quota.ramMb, quota.diskMb)) new_cpu = float(cpu + quota.numCpus) new_ram = int((ram + Amount(quota.ramMb, Data.MB)).as_(Data.MB)) new_disk = int((disk + Amount(quota.diskMb, Data.MB)).as_(Data.MB)) log.info('Attempting to update quota for %s to\n\tCPU\t%s\n\tRAM\t%s MB\n\tDisk\t%s MB' % (role, new_cpu, new_ram, new_disk)) resp = client.set_quota(role, new_cpu, new_ram, new_disk) check_and_log_response(resp)
def update(job_spec, config_file): """usage: update cluster/role/env/job config Performs a rolling upgrade on a running job, using the update configuration within the config file as a control for update velocity and failure tolerance. Updates are fully controlled client-side, so aborting an update halts the update and leaves the job in a 'locked' state on the scheduler. Subsequent update attempts will fail until the update is 'unlocked' using the 'cancel_update' command. The updater only takes action on shards in a job that have changed, meaning that changing a single shard will only induce a restart on the changed shard. You may want to consider using the 'diff' subcommand before updating, to preview what changes will take effect. """ options = app.get_options() CoreCommandHook.run_hooks("update", options, job_spec, config_file) new_cmd = ["job", "update"] instance_spec = job_spec if options.shards is not None: instance_spec = "%s/%s" % (job_spec, ",".join(map(str, options.shards))) new_cmd.append(instance_spec) new_cmd.append(config_file) if options.json: new_cmd.append("--read-json") if options.health_check_interval_seconds is not None: new_cmd.append("--health-check-interval-seconds=%s" % options.health_check_interval_seconds) v1_deprecation_warning("update", new_cmd) return really_update(job_spec, config_file, options)
def log_dir(): """ Get the current directory into which logs will be written. """ if LogOptions._LOG_DIR is None: LogOptions._LOG_DIR = app.get_options().twitter_common_log_log_dir return LogOptions._LOG_DIR
def scribe_host(): """ Get the current host running the scribe daemon. """ if LogOptions._SCRIBE_HOST is None: LogOptions._SCRIBE_HOST = app.get_options().twitter_common_log_scribe_host return LogOptions._SCRIBE_HOST
def scribe_log_scheme(): """ Get the current scribe log scheme. """ if LogOptions._SCRIBE_LOG_SCHEME is None: LogOptions.set_scribe_log_level(app.get_options().twitter_common_log_scribe_log_level) return LogOptions._SCRIBE_LOG_SCHEME
def scribe_buffer(): """ Get the current buffer setting for scribe logging. """ if LogOptions._SCRIBE_BUFFER is None: LogOptions._SCRIBE_BUFFER = app.get_options().twitter_common_log_scribe_buffer return LogOptions._SCRIBE_BUFFER
def scribe_category(): """ Get the current category used when logging to the scribe daemon. """ if LogOptions._SCRIBE_CATEGORY is None: LogOptions._SCRIBE_CATEGORY = app.get_options().twitter_common_log_scribe_category return LogOptions._SCRIBE_CATEGORY
def end_maintenance_hosts(cluster): """usage: end_maintenance_hosts {--filename=filename | --hosts=hosts} cluster """ options = app.get_options() HostMaintenance(CLUSTERS[cluster], options.verbosity).end_maintenance( parse_hosts(options.filename, options.hosts))
def scheduler_snapshot(cluster): """usage: scheduler_snapshot cluster Request that the scheduler perform a storage snapshot and block until complete. """ options = app.get_options() check_and_log_response(AuroraClientAPI(CLUSTERS[cluster], options.verbosity).snapshot())
def scribe_port(): """ Get the current port used to connect to the scribe daemon. """ if LogOptions._SCRIBE_PORT is None: LogOptions._SCRIBE_PORT = app.get_options().twitter_common_log_scribe_port return LogOptions._SCRIBE_PORT
def handle_open(scheduler_url, role, env, job): url = synthesize_url(scheduler_url, role, env, job) if url: log.info('Job url: %s' % url) if app.get_options().open_browser: import webbrowser webbrowser.open_new_tab(url)
def inspect(job_spec, config_file): """usage: inspect cluster/role/env/job config Verifies that a job can be parsed from a configuration file, and displays the parsed configuration. """ options = app.get_options() newcmd = ["job", "inspect", job_spec, config_file] if options.json: newcmd.append("--read-json") v1_deprecation_warning("inspect", newcmd) config = get_job_config(job_spec, config_file, options) if options.raw: print('Parsed job config: %s' % config.job()) return job_thrift = config.job() job = config.raw() job_thrift = config.job() print('Job level information') print(' name: %s' % job.name()) print(' role: %s' % job.role()) print(' contact: %s' % job.contact()) print(' cluster: %s' % job.cluster()) print(' instances: %s' % job.instances()) if job.has_cron_schedule(): print(' cron:') print(' schedule: %s' % job.cron_schedule()) print(' policy: %s' % job.cron_collision_policy()) if job.has_constraints(): print(' constraints:') for constraint, value in job.constraints().get().items(): print(' %s: %s' % (constraint, value)) print(' service: %s' % job_thrift.taskConfig.isService) print(' production: %s' % bool(job.production().get())) print() task = job.task() print('Task level information') print(' name: %s' % task.name()) if len(task.constraints().get()) > 0: print(' constraints:') for constraint in task.constraints(): print(' %s' % (' < '.join(st.get() for st in constraint.order()))) print() processes = task.processes() for process in processes: print('Process %s:' % process.name()) if process.daemon().get(): print(' daemon') if process.ephemeral().get(): print(' ephemeral') if process.final().get(): print(' final') print(' cmdline:') for line in process.cmdline().get().splitlines(): print(' ' + line) print()
def scheduler_unload_recovery(cluster): """usage: scheduler_unload_recovery cluster Unloads a staged recovery. """ options = app.get_options() check_and_log_response(AuroraClientAPI(CLUSTERS[cluster], options.verbosity).unload_recovery())
def perform_maintenance_hosts(cluster): """usage: perform_maintenance cluster [--filename=filename] [--hosts=hosts] [--batch_size=num] [--post_drain_script=path] [--grouping=function] Asks the scheduler to remove any running tasks from the machine and remove it from service temporarily, perform some action on them, then return the machines to service. """ options = app.get_options() drainable_hosts = parse_hosts(options) if options.post_drain_script: if not os.path.exists(options.post_drain_script): die("No such file: %s" % options.post_drain_script) cmd = os.path.abspath(options.post_drain_script) drained_callback = lambda host: subprocess.Popen([cmd, host]) else: drained_callback = None MesosMaintenance(CLUSTERS[cluster], options.verbosity).perform_maintenance( drainable_hosts, batch_size=int(options.batch_size), callback=drained_callback, grouping_function=options.grouping, )
def main(args): options = app.get_options() if options.show_help: app.help() if options.show_version or options.just_version: print('Python NailGun client version 0.0.1', file=sys.stdout) if options.just_version: sys.exit(0) # Assume ng.pex has been aliased to the command name command = re.compile('.pex$').sub('', os.path.basename(sys.argv[0])) args_index = 0 # Otherwise the command name is the 1st arg if command == 'ng': if not args: app.help() command = args[0] args_index = 1 ng = NailgunClient(host=options.ng_host, port=options.ng_port) result = ng(command, *args[args_index:], **os.environ) sys.exit(result)
def test_app_add_options_with_Option(self): # options.Option opt = options.Option('--option1', dest='option1') app.add_option(opt) app.init(force_args=['--option1', 'option1value', 'extraargs']) assert app.get_options().option1 == 'option1value' assert app.argv() == ['extraargs']
def restore(j, target): """ Restore jobs from a config directory """ config_dir = app.get_options().config_dir if config_dir is None: log.error("no config_dir defined.") sys.exit() if not os.path.exists(os.path.realpath(config_dir)): log.error("config path does not exist") sys.exit() for job in os.listdir(config_dir): # here we need to: # check for config.xml # check for job on target server # if job exists, update it # if not create it. config_file = "%s/%s/config.xml" % (config_dir, job) if not os.path.exists(config_file): log.error("config file does not exist: %s" % config_file) sys.exit() job_xml = read_config(config_file) try: jobj = j.get_job(job) if not jobj.get_config() == job_xml: log.info("Updating %s" % job) jobj.update_config(job_xml) except UnknownJob as e: log.error("job doesnt exist. creating") j.create_job(job, job_xml)
def scheduler_snapshot(cluster): """usage: scheduler_snapshot cluster Request that the scheduler perform a storage snapshot and block until complete. """ options = app.get_options() check_and_log_response(AuroraClientAPI(CLUSTERS["cluster"], options.verbosity).snapshot())
def disk_log_level(): """ Get the current disk_log_level (in logging units specified by logging module.) """ if LogOptions._DISK_LOG_LEVEL is None: LogOptions.set_disk_log_level(app.get_options().twitter_common_log_disk_log_level) return LogOptions._DISK_LOG_LEVEL
def disk_log_scheme(): """ Get the current disk log scheme. """ if LogOptions._DISK_LOG_SCHEME is None: LogOptions.set_disk_log_level(app.get_options().twitter_common_log_disk_log_level) return LogOptions._DISK_LOG_SCHEME
def stdout_log_level(): """ Get the current stdout_log_level (in logging units specified by logging module.) """ if LogOptions._STDOUT_LOG_LEVEL is None: LogOptions.set_stdout_log_level(app.get_options().twitter_common_log_stdout_log_level) return LogOptions._STDOUT_LOG_LEVEL
def stdout_log_scheme(): """ Get the current stdout log scheme. """ if LogOptions._STDOUT_LOG_SCHEME is None: LogOptions.set_stdout_log_level(app.get_options().twitter_common_log_stdout_log_level) return LogOptions._STDOUT_LOG_SCHEME
def scheduler_stage_recovery(cluster, backup_id): """usage: scheduler_stage_recovery cluster backup_id Stages a backup for recovery. """ options = app.get_options() check_and_log_response(AuroraClientAPI(CLUSTERS[cluster], options.verbosity).stage_recovery(backup_id))
def increase_quota(cluster, role, cpu_str, ram_str, disk_str): """usage: increase_quota cluster role cpu ram[unit] disk[unit] Increases the amount of production quota allocated to a user. """ cpu = float(cpu_str) ram = parse_data(ram_str) disk = parse_data(disk_str) options = app.get_options() client = AuroraClientAPI(CLUSTERS[cluster], options.verbosity == "verbose") resp = client.get_quota(role) quota = resp.result.getQuotaResult.quota log.info( "Current quota for %s:\n\tCPU\t%s\n\tRAM\t%s MB\n\tDisk\t%s MB" % (role, quota.numCpus, quota.ramMb, quota.diskMb) ) new_cpu = cpu + quota.numCpus new_ram = ram + Amount(quota.ramMb, Data.MB) new_disk = disk + Amount(quota.diskMb, Data.MB) log.info( "Attempting to update quota for %s to\n\tCPU\t%s\n\tRAM\t%s MB\n\tDisk\t%s MB" % (role, new_cpu, new_ram.as_(Data.MB), new_disk.as_(Data.MB)) ) resp = client.set_quota(role, new_cpu, new_ram.as_(Data.MB), new_disk.as_(Data.MB)) check_and_log_response(resp)
def setup_function(self): options = app.get_options() if options.serverset_module_enable: self._assert_valid_inputs(options) self._construct_serverset(options) self._thread = ServerSetJoinThread(self._rejoin_event, self._join) self._thread.start() self._rejoin_event.set()
def stderr_log_level(): """ Get the current stderr_log_level (in logging units specified by logging module.) """ if LogOptions._STDERR_LOG_LEVEL is None: LogOptions.set_stderr_log_level( app.get_options().twitter_common_log_stderr_log_level) return LogOptions._STDERR_LOG_LEVEL
def scheduler_unload_recovery(cluster): """usage: scheduler_unload_recovery cluster Unloads a staged recovery. """ options = app.get_options() check_and_log_response(AuroraClientAPI(CLUSTERS[cluster], options.verbosity) .unload_recovery())
def list_pids(): options = app.get_options() pattern = os.path.join(options.hsperfdata_root, 'hsperfdata_*', '*') for path in glob.glob(pattern): root, pid = os.path.split(path) dirname = os.path.basename(root) role = dirname[len('hsperfdata_'):] yield path, role, int(pid)
def scribe_port(): """ Get the current port used to connect to the scribe daemon. """ if LogOptions._SCRIBE_PORT is None: LogOptions._SCRIBE_PORT = app.get_options( ).twitter_common_log_scribe_port return LogOptions._SCRIBE_PORT
def main(args, options): server = 'ci.makewhat.is:8080' if app.get_options().server is not None: server = app.get_options().server j = Jenkins("http://%s" % server) j.server = server cmd = parse_arguments(args) if cmd.__name__ == restore: target = app.get_options().master else: target = app.get_options().job cmd(j, target)
def file_provider(): options = app.get_options() def provider(): with open(options.filename, 'rb') as fp: return fp.read() return provider
def scheduler_stage_recovery(cluster, backup_id): """usage: scheduler_stage_recovery cluster backup_id Stages a backup for recovery. """ options = app.get_options() check_and_log_response( AuroraClientAPI(CLUSTERS[cluster], options.verbosity).stage_recovery(backup_id))
def disk_log_level(): """ Get the current disk_log_level (in logging units specified by logging module.) """ if LogOptions._DISK_LOG_LEVEL is None: LogOptions.set_disk_log_level( app.get_options().twitter_common_log_disk_log_level) return LogOptions._DISK_LOG_LEVEL
def make_client_factory(): verbose = getattr(app.get_options(), 'verbosity', 'normal') == 'verbose' class TwitterAuroraClientAPI(HookedAuroraClientAPI): def __init__(self, cluster, *args, **kw): if cluster not in CLUSTERS: die('Unknown cluster: %s' % cluster) super(TwitterAuroraClientAPI, self).__init__(CLUSTERS[cluster], *args, **kw) return functools.partial(TwitterAuroraClientAPI, verbose=verbose)
def stderr_log_scheme(): """ Get the current stderr log scheme. """ if LogOptions._STDOUT_LOG_SCHEME is None: LogOptions.set_stderr_log_level( app.get_options().twitter_common_log_stderr_log_level) return LogOptions._STDOUT_LOG_SCHEME
def disk_log_scheme(): """ Get the current disk log scheme. """ if LogOptions._DISK_LOG_SCHEME is None: LogOptions.set_disk_log_level( app.get_options().twitter_common_log_disk_log_level) return LogOptions._DISK_LOG_SCHEME