def main(args: argparse.Namespace) -> None: """ entry point for the 'backup' subcommand """ if args.dry_run: logger.warning('Running in dry-run mode; no files will be backed up!') logger.info(f'Starting backup for {args.name}') backup_store = get_backup_store(args.name) with backup_store.unlock(dry_run=args.dry_run, preserve_scratch=args.preserve_scratch_dir): marked_files: Set[str] = set() for base_path in staticconf.read_list('directories', namespace=args.name): abs_base_path = os.path.abspath(base_path) exclusions = compile_exclusions( staticconf.read_list('exclusions', [], namespace=args.name)) marked_files |= _scan_directory( abs_base_path, backup_store, exclusions, args.dry_run, ) for abs_file_name in backup_store.manifest.files() - marked_files: logger.info(f'{abs_file_name} has been deleted') if not args.dry_run: backup_store.manifest.delete(abs_file_name) logger.info(f'Backup for {args.name} finished')
def test_create_emr_args(input_date, dev, cores, pipeline_yaml): print "just starting" load_package_config('config.yaml') YamlConfiguration(pipeline_yaml) input_prefix = read_list('pipeline.et_step.s3_prefixes')[0] input_file = input_prefix + input_date + '/part-*.gz' expected_args = EXPECTED_DEV_ARGS if dev else EXPECTED_AWS_ARGS expected_out_file = read_string('pipeline.s3_output_prefix') delimiter = read_string('redshift_column_delimiter') with mock.patch.dict(os.environ, {'LOGNAME': 'testuser', 'YELPCODE': '.'}): logname = os.environ['LOGNAME'] expected_out_file = os.path.join( expected_out_file.format(logname=logname), input_date ) extractions = pipeline_yaml_schema_file_path() formatted_args = expected_args.format(input_file, expected_out_file, cores, extractions, delimiter) output_under_test = create_emr_args(input_date, 10, input_prefix, dev) assert output_under_test == formatted_args
def get_autoscaling_config(config_namespace: str) -> AutoscalingConfig: """ Load autoscaling configuration values from the provided config_namespace, falling back to the values stored in the default namespace if none are specified. :param config_namespace: namespace to read from before falling back to the default namespace :returns: AutoscalingConfig object with loaded config values """ default_excluded_resources = staticconf.read_list( 'autoscaling.excluded_resources', default=[]) default_setpoint = staticconf.read_float('autoscaling.setpoint') default_target_capacity_margin = staticconf.read_float( 'autoscaling.target_capacity_margin') reader = staticconf.NamespaceReaders(config_namespace) return AutoscalingConfig( excluded_resources=reader.read_list( 'autoscaling.excluded_resources', default=default_excluded_resources), setpoint=reader.read_float('autoscaling.setpoint', default=default_setpoint), target_capacity_margin=reader.read_float( 'autoscaling.target_capacity_margin', default=default_target_capacity_margin, ), )
def test_get_metrics(end_time): required_metrics = staticconf.read_list( 'autoscale_signal.required_metrics', namespace='bar.mesos_config', ) metrics_client = mock.Mock() metrics_client.get_metric_values.side_effect = [ { 'cpus_allocated': [(1, 2), (3, 4)] }, { 'cpus_allocated': [(5, 6), (7, 8)] }, { 'app1,cost': [(1, 2.5), (3, 4.5)] }, ] metrics = get_metrics_for_signal('foo', 'bar', 'mesos', 'app1', metrics_client, required_metrics, end_time) assert metrics_client.get_metric_values.call_args_list == [ mock.call( 'cpus_allocated', SYSTEM_METRICS, end_time.shift(minutes=-10).timestamp, end_time.timestamp, app_identifier='app1', extra_dimensions={ 'cluster': 'foo', 'pool': 'bar' }, is_regex=False, ), mock.call( 'cpus_allocated', SYSTEM_METRICS, end_time.shift(minutes=-10).timestamp, end_time.timestamp, app_identifier='app1', extra_dimensions={ 'cluster': 'foo', 'pool': 'bar.mesos' }, is_regex=False, ), mock.call( 'cost', APP_METRICS, end_time.shift(minutes=-30).timestamp, end_time.timestamp, app_identifier='app1', extra_dimensions={}, is_regex=False, ), ] assert 'cpus_allocated' in metrics assert 'app1,cost' in metrics
def _split_root_prefix(abs_file_name: str, backup_name: str) -> Tuple[str, str]: for directory in staticconf.read_list('directories', namespace=backup_name): abs_root = os.path.abspath(directory) + os.path.sep if abs_file_name.startswith(abs_root): return abs_root, abs_file_name[len(abs_root):] raise ValueError( f'{abs_file_name} does not start with any directory prefix')
def _load_module_configs(env_config_path: str): staticconf.YamlConfiguration(env_config_path) for config in staticconf.read_list('module_config', default=[]): if 'file' in config: staticconf.YamlConfiguration(config['file'], namespace=config['namespace']) staticconf.DictConfiguration(config.get('config', {}), namespace=config['namespace']) if 'initialize' in config: path = config['initialize'].split('.') function = path.pop() module_name = '.'.join(path) module = __import__(module_name, globals(), locals(), [path[-1]]) getattr(module, function)()
def s3_to_psv_main(args): mrjob = read_string('pipeline.et_step.mrjob') stream_name = read_string('pipeline.et_step.s3_to_s3_stream') DATABASE = read_string('pipeline.redshift_database') LOG_STREAM = PipelineStreamLogger( stream_name, args.run_local, mrjob, input_date=args.date ) day_to_run = setup_dates_to_check(args.date, args.run_local, LOG_STREAM) try: if not args.run_local: setup_private(args.private) # Create a psql instance based on args if args.skip_progress_in_redshift: status_table = DynamoDbStatusTable( LOG_STREAM, run_local=args.run_local ) else: status_table = RedshiftStatusTable( RedshiftPostgres( LOG_STREAM, args.private, run_local=args.run_local ) ) load_msg = __load_data_from_s3( status_table, read_list('pipeline.et_step.s3_prefixes'), day_to_run, mrjob, args.run_local, DATABASE, LOG_STREAM, force_et=args.force_et ) LOG_STREAM.write_msg("complete", extra_msg=load_msg) finally: clear_env(args.run_local)
def test_create_emr_args(input_date, dev, cores, pipeline_yaml): print "just starting" load_package_config('config.yaml') YamlConfiguration(pipeline_yaml) input_prefix = read_list('pipeline.et_step.s3_prefixes')[0] input_file = input_prefix + input_date + '/part-*.gz' expected_args = EXPECTED_DEV_ARGS if dev else EXPECTED_AWS_ARGS expected_out_file = read_string('pipeline.s3_output_prefix') delimiter = read_string('redshift_column_delimiter') with mock.patch.dict(os.environ, {'LOGNAME': 'testuser', 'YELPCODE': '.'}): logname = os.environ['LOGNAME'] expected_out_file = os.path.join( expected_out_file.format(logname=logname), input_date) extractions = pipeline_yaml_schema_file_path() formatted_args = expected_args.format(input_file, expected_out_file, cores, extractions, delimiter) output_under_test = create_emr_args(input_date, 10, input_prefix, dev) assert output_under_test == formatted_args
def s3_to_psv_main(args): mrjob = read_string('pipeline.et_step.mrjob') stream_name = read_string('pipeline.et_step.s3_to_s3_stream') DATABASE = read_string('pipeline.redshift_database') LOG_STREAM = PipelineStreamLogger(stream_name, args.run_local, mrjob, input_date=args.date) day_to_run = setup_dates_to_check(args.date, args.run_local, LOG_STREAM) try: if not args.run_local: setup_private(args.private) # Create a psql instance based on args if args.skip_progress_in_redshift: status_table = DynamoDbStatusTable(LOG_STREAM, run_local=args.run_local) else: status_table = RedshiftStatusTable( RedshiftPostgres(LOG_STREAM, args.private, run_local=args.run_local)) load_msg = __load_data_from_s3( status_table, read_list('pipeline.et_step.s3_prefixes'), day_to_run, mrjob, args.run_local, DATABASE, LOG_STREAM, force_et=args.force_et) LOG_STREAM.write_msg("complete", extra_msg=load_msg) finally: clear_env(args.run_local)
def sensu_checkin( *, check_name: str, output: str, source: str, status: Status = Status.OK, app: Optional[str] = None, pool: Optional[str] = None, scheduler: Optional[str] = None, noop: bool = False, page: bool = True, **kwargs: Any, ) -> None: # This function feels like a massive hack, let's revisit and see if we can make it better (CLUSTERMAN-304) # # TODO (CLUSTERMAN-126) right now there's only one app per pool so use the global pool namespace # We assume the "pool" name and the "app" name are the same # # Use 'no-namespace' instead of None so we don't skip the per-cluster override pool_namespace = POOL_NAMESPACE.format( pool=app, scheduler=scheduler) if app else 'no-namespace' # read the sensu configuration from srv-configs; signals are not required to define this, so in the case # that they do not define anything, we fall back to the clusterman config. The clusterman config can override # alerts on a per-cluster basis, so first check there; if nothing is defined there, fall back to the default, # which is required to be defined, so we know that someone is going to get the notification # sensu_config = dict( staticconf.read_list('sensu_config', default=[{}], namespace=pool_namespace).pop()) if not sensu_config: sensu_config = dict( staticconf.read_list(f'clusters.{source}.sensu_config', default=[{}]).pop()) if not sensu_config: sensu_config = dict(staticconf.read_list('sensu_config').pop()) # If we've turned off paging in the config, we don't want this function to ever page config_page = sensu_config.pop('page', None) page = False if config_page is False else page # So we know where alerts are coming from precisely output += ''.join([ '\n\nThis check came from:\n', f'- Cluster/region: {source}\n', f'- Pool: {pool}.{scheduler}\n' if pool else '', f'- App: {app}\n' if app else '', ]) sensu_config.update({ 'name': check_name, 'output': output, 'source': source, 'status': status.value, 'page': page, }) # values passed in to this function override config file values (is this really correct??) sensu_config.update(kwargs) pysensu_yelp = _get_sensu() if noop or not pysensu_yelp: logger.info(('Would have sent this event to Sensu:\n' f'{pprint.pformat(sensu_config)}')) return # team and runbook are required entries in srv-configs, so we know this will go to the "right" place pysensu_yelp.send_event(**sensu_config)