def merge_params(self, arg_dict, tuner_cfg_dict): """ Extracts all Spark direct and conf parameters from program arguments and from OpenTuner config dict, and merges them with their respective Spark default parameters. The function assumes that all configurable parameters (i.e. range types) in the arg_dict are over-written by specific param values in tuner_cfg_dict. :param arg_dict: program argument dict that maps a program flag to corresponding SparkParamType :param tuner_cfg_dict: OpenTuner config dict, which map a program flag to a corresponding SparkParamType, which is guaranteed to be a non-range value :return: a tuple of two dicts, the first containing all Spark direct parameters, and the second containing all Spark conf parameters. The keys for both are Spark parameter names, and not program flags. """ input_direct_params = {} input_conf_params = {} # Extract direct and conf param from input dicts. # Note the order: tuner_cfg_dict takes precedence over arg_dict # to ensure that all configurable parameters (i.e. range # types) in the arg_dict are over-written by specific param # values. TODO Might want to assert: # type(param) is SparkParamType and type(param.value) is not tuple input_dict = dict(ChainMap({}, tuner_cfg_dict, arg_dict)) for flag, param in input_dict.items(): param_val = param.value # To ensure that we explicitly specify memory units - lest # Spark/YARN misinterprets the input - we use # `Util.format_size` here to 'round' all values to # kibibytes. For general units, there a small risk that the # rounding here - done outside of Opentuner configuration - may # throw off any underlying optimization algorithm. # TODO figure out when rounding here might cause issues if isinstance(param, SparkMemoryType): param_val = Util.format_size(param_val, 'k') if flag in FLAG_TO_DIRECT_PARAM: input_direct_params[param.spark_name] = param_val elif flag in FLAG_TO_CONF_PARAM: input_conf_params[param.spark_name] = param_val # merge input dicts with defaults direct_param_default = SparkParamType.get_value_map( self.direct_param_default) direct_params = ChainMap({}, input_direct_params, direct_param_default) conf_defaults = SparkParamType.get_value_map(self.conf_defaults) conf_params = ChainMap({}, input_conf_params, conf_defaults) return dict(direct_params), dict(conf_params)
def get_app_spec(app, context_processors=None): if isinstance(app, six.string_types): return ChainMap({ 'path': app, 'context_processors': context_processors }, _app_spec_defaults) else: if 'path' not in app: raise ValueError( 'Each app specified must be a string or a dictionary containing a path' ) app.setdefault('context_processors', []).extend(context_processors or []) return ChainMap(app, _app_spec_defaults)
def aggregate_and_send_metrics(url, app_name, instance_id, custom_headers, features, ondisk_cache): feature_stats_list = [] for feature_name in features.keys(): feature_stats = { features[feature_name].name: { "yes": features[feature_name].yes_count, "no": features[feature_name].no_count } } features[feature_name].reset_stats() feature_stats_list.append(feature_stats) metrics_request = { "appName": app_name, "instanceId": instance_id, "bucket": { "start": ondisk_cache[METRIC_LAST_SENT_TIME].isoformat(), "stop": datetime.now(timezone.utc).isoformat(), "toggles": dict(ChainMap(*feature_stats_list)) } } send_metrics(url, metrics_request, custom_headers) ondisk_cache[METRIC_LAST_SENT_TIME] = datetime.now(timezone.utc) ondisk_cache.sync()
def print_sessions(results, country): if country == 'United States,ga:country==Canada': country = 'US' elif country == 'United Kingdom': country = 'UK' results = results.get('rows') def new_result(result): l = {"option": result[1], result[0]: result[2]} return l results = itertools.groupby(sorted(list(map(new_result, results)), key=itemgetter('option')), key=lambda x: x['option']) result = [] for key, item in results: result.append(dict(ChainMap(*list(item)+[{'Country': country}]))) key_lst = ['Referral', 'Direct', 'Social', 'Organic Search', 'Paid Search', 'Country', 'option'] def merge_email(a): keys = list(a.keys()) Email = 0 for key in keys: if key not in key_lst: Email += int(a[key]) del a[key] a['Email'] = str(Email) return a result = list(map(merge_email, result)) return result
def args_from_node(cls, node, overrides=None, defaults=None): if overrides is None: overrides = {} if defaults is None: defaults = {} params = ChainMap(overrides, node, defaults) return make_dict_from_map(params, cls.get_arg_key_map())
def print_Trafiic(results): results = results.get('rows') def new_result(result): l = {"option": result[0], 'traffic': result[1]} return l results = itertools.groupby(sorted(list(map(new_result, results)), key=itemgetter('option')), key=lambda x: x['option']) result = [] for key, item in results: result.append(dict(ChainMap(*list(item)))) return result
def print_sourceTraffic(results): results = results.get('rows') def new_result(result): l = {"option": result[1], 'Email' if result[0]=='(Other)' else result[0]: result[2]} return l results = itertools.groupby(sorted(list(map(new_result, results)), key=itemgetter('option')), key=lambda x: x['option']) result = [] for key, item in results: i = [j for j in item] result.append(dict(ChainMap(*list(i)))) return result
def print_conversions(results): def new_result(result): l = {"option": result[1], 'Email' if result[0] == '(Other)' else result[0]: result[2]} return l results = itertools.groupby(sorted(list(map(new_result, results)), key=itemgetter('option')), key=lambda x: x['option']) result = [] for key, item in results: result.append(dict(ChainMap(*list(item)))) return result
class GeppettoResource(JsonResource): packages = [ eClassifiers, datasources.eClassifiers, types.eClassifiers, values.eClassifiers, variables.eClassifiers ] chain = ChainMap(*packages) def serialize_eclass(self, eclass): return eclass.name @lru_cache() def resolve_eclass(self, uri): return self.chain.get(uri)
def _get_section_env_vars(self, section): section_env_prefix = 'OASIS_API_{}_'.format(section.upper()) global_env_prefix = 'OASIS_API_' return ChainMap( { k.replace(section_env_prefix, ''): v for k, v in os.environ.items() if k.startswith(section_env_prefix) }, { k.replace(global_env_prefix, ''): v for k, v in os.environ.items() if k.startswith(global_env_prefix) }, )
def args_from_node(cls, node, overrides=None, defaults=None): if overrides is None: overrides = {} if defaults is None: defaults = {} params = ChainMap(overrides, node, defaults) return { 'user': params['user'], 'host': params['ip'], 'port': params.get('ssh_port', cls.SSH_PORT), 'pkey': params.get('pkey'), 'key_filename': params.get('key_filename'), 'password': params.get('password'), 'name': params.get('name'), }
def getArguments(): # Program Internal settings # I know that it is slower to load this way but it is more explicit and readable in my opinion program_defaults = {} program_defaults['debug'] = 'False' program_defaults['group_id'] = 'com.dell.cpsd' program_defaults['maven_dependency_plugin_version'] = '3.0.2' program_defaults['dependency_tree_output_file'] = 'dependency_tree' # Property File settings property_file_name = os.path.splitext( os.path.basename(__file__))[0] + '.props' property_file_path = os.path.realpath( os.path.join( os.getcwd(), os.path.dirname(property_file_name))) + os.sep + property_file_name property_file_properties = {} #If no property file exists, don't sweat it just keep going. try: config = ConfigParser() with open(property_file_path) as stream: stream = StringIO("[root]\n" + stream.read()) config.readfp(stream) property_file_properties = dict(config.items('root')) except IOError: pass # Command Line settings parser = argparse.ArgumentParser() parser.add_argument('-db', '--debug', help='Ibid. Defaults to False') parser.add_argument('-gid', '--group_id', help='Ibid. Defaults to com.dell.cpsd') parser.add_argument('-mpv', '--maven_dependency_plugin_version', help='Ibid. Defaults to 3.0.2') parser.add_argument('-dtof', '--dependency_tree_output_file', help='Ibid. Defaults to dependency_tree') namespace = parser.parse_args() # Create a dictionary of the given parser command line inputs command_line_args = {k: v for k, v in vars(namespace).items() if v} # Now create a chainmap of all the dictionaries in the order of precedence. return ChainMap(command_line_args, os.environ, property_file_properties, program_defaults)
def from_node(cls, node, overrides=None, defaults=None): if overrides is None: overrides = {} if defaults is None: defaults = {} params = ChainMap(overrides, node, defaults) return cls( user=params['user'], host=params['ip'], # paramiko doesn't like None default, requires SSH_PORT default port=params.get('ssh_port', SSH_PORT), pkey=params.get('pkey'), key_filename=params.get('key_filename'), password=params.get('password'), name=params.get('name'))
def splits(cls, config): folder = config["data_folder"] wanted_words = config["wanted_words"] unknown_prob = config["unknown_prob"] train_pct = config["train_pct"] dev_pct = config["dev_pct"] test_pct = config["test_pct"] snr = config["snr"] words = {word: i + 2 for i, word in enumerate(wanted_words)} words.update({cls.LABEL_SILENCE: 0, cls.LABEL_UNKNOWN: 1}) sets = [{}, {}, {}] unknowns = [0] * 3 bg_noise_files = [] unknown_files = [] wav_path = [] print("snr:{}".format(snr)) path_listname = ['/home/guyue/CNNProgram/datalist/snr', snr, '.lst'] path_listname = ''.join(path_listname) with open(path_listname) as f: for line in f.readlines(): data = line.split(' ') wav_name = data[0][:] wordname = data[1][0:-1] if wordname in words: label = words[wordname] else: label = words[cls.LABEL_UNKNOWN] if label == words[cls.LABEL_UNKNOWN]: unknown_files.append(wav_name) continue if config["group_speakers_by_id"]: hashname = re.sub(r"_nohash_.*$", "", wav_name) hashname = hashname.split('/') hashname = hashname[-1][:] max_no_wavs = 2**27 - 1 bucket = int(hashlib.sha1(hashname.encode()).hexdigest(), 16) bucket = (bucket % (max_no_wavs + 1)) * (100. / max_no_wavs) if bucket < dev_pct: tag = DatasetType.DEV elif bucket < test_pct + dev_pct: tag = DatasetType.TEST else: tag = DatasetType.TRAIN sets[tag.value][wav_name] = label for tag in range(len(sets)): unknowns[tag] = int(unknown_prob * len(sets[tag])) random.shuffle(unknown_files) a = 0 for i, dataset in enumerate(sets): b = a + unknowns[i] unk_dict = { u: words[cls.LABEL_UNKNOWN] for u in unknown_files[a:b] } dataset.update(unk_dict) a = b train_cfg = ChainMap(dict(bg_noise_files=bg_noise_files), config) test_cfg = ChainMap(dict(bg_noise_files=bg_noise_files, noise_prob=0), config) datasets = (cls(sets[0], DatasetType.TRAIN, train_cfg), cls(sets[1], DatasetType.DEV, test_cfg), cls(sets[2], DatasetType.TEST, test_cfg)) return datasets
def splits(cls, config): folder = config["data_folder"] # data/speech_dataset wanted_words = config[ "wanted_words"] # ['yes', 'no', 'up', 'down', 'left', 'right', 'on', 'off', 'stop', 'go'] unknown_prob = config["unknown_prob"] # 0.1 train_pct = config["train_pct"] # 80 dev_pct = config["dev_pct"] # 10 test_pct = config["test_pct"] # 10 words = {word: i + 2 for i, word in enumerate(wanted_words)} # {'yes': 2, 'no': 3, 'up': 4, 'down': 5, 'left': 6, 'right': 7, 'on': 8, 'off': 9, 'stop': 10, 'go': 11} words.update({cls.LABEL_SILENCE: 0, cls.LABEL_UNKNOWN: 1}) sets = [{}, {}, {}] unknowns = [0] * 3 bg_noise_files = [] unknown_files = [] for folder_name in os.listdir(folder): path_name = os.path.join(folder, folder_name) # data/speech_dataset/yes is_bg_noise = False if os.path.isfile(path_name): continue if folder_name in words: label = words[folder_name] elif folder_name == "_background_noise_": is_bg_noise = True else: label = words[cls.LABEL_UNKNOWN] for filename in os.listdir(path_name): wav_name = os.path.join( path_name, filename) # data/speech_dataset/down/00b01445_nohash_1.wav if is_bg_noise and os.path.isfile(wav_name): bg_noise_files.append(wav_name) continue elif label == words[ cls. LABEL_UNKNOWN]: # here the one\four folder is the UNKNOWN unknown_files.append(wav_name) continue if config["group_speakers_by_id"]: hashname = re.sub(r"_nohash_.*$", "", filename) max_no_wavs = 2**27 - 1 bucket = int(hashlib.sha1(hashname.encode()).hexdigest(), 16) # hash values hexdigest() return 16 jinzhi bucket = (bucket % (max_no_wavs + 1)) * (100. / max_no_wavs) if bucket < dev_pct: tag = DatasetType.DEV # TRAIN = 0, DEV = 1, TEST = 2 elif bucket < test_pct + dev_pct: # dev_pct = 10, test_pct = 10, train_pct = 80 tag = DatasetType.TEST else: tag = DatasetType.TRAIN if config["type"] == "eval": sets[2][wav_name] = label elif config["type"] == "train": sets[tag.value][wav_name] = label # sets = [ # train {'00b01445_nohash_1': 1, }, length = 16696 # dev {'00b01443_nohash_1': 2, }, length = 2316 # test {'00b01441_nohash_1': 3, } length = 2311 # ] for tag in range(len(sets)): unknowns[tag] = int( unknown_prob * len(sets[tag])) # train length, validation, test random.shuffle(unknown_files) a = 0 for i, dataset in enumerate(sets): b = a + unknowns[i] unk_dict = { u: words[cls.LABEL_UNKNOWN] for u in unknown_files[a:b] } dataset.update(unk_dict) a = b # unk_dict = { # 0:len(train_dataset)-1, # len(train_dataset): len(train+dev_dataset)-1 # len(train+dev):len(train+dev+test)-1 # } train_cfg = ChainMap(dict(bg_noise_files=bg_noise_files), config) test_cfg = ChainMap(dict(bg_noise_files=bg_noise_files, noise_prob=0), config) # print(test_cfg) datasets = (cls(sets[0], DatasetType.TRAIN, train_cfg), cls(sets[1], DatasetType.DEV, test_cfg), cls(sets[2], DatasetType.TEST, config)) return datasets
from chainmap import ChainMap x = ChainMap() try: x[5] except KeyError: pass a = {"a": 4} b = {"b": 5} x = ChainMap((a, b)) assert x["a"] == 4 assert x["b"] == 5 a["b"] = 6 b["c"] = 7 assert x["b"] == 6 assert x["c"] == 7 x = ChainMap() assert x.maps == [{}] x.maps = [{"a": 4}] assert x["a"] == 4 x = ChainMap((1, )) try: x[5] except TypeError as e: assert e.args[0] == "'int' object is not subscriptable"
def transform_templates(self): empty = dict() try: global_context = ChainMap(self.data) for page in self.get_all_pages(): data_key, ext = splitext(page.file_path) # The context is the complete set of variables the template # will be able to reference. # # There are automatic globals like 'root' and 'page'. # # There are variables scoped to the file, matched by name. So # if there is a file 'foo.yml' containing 'title=bar' then within # the template 'foo.html', the variable 'title' will be defined and # set to the string 'bar'. file_variables = self.data.get(data_key) or empty # print("XXX Data for %s:" % data_key) # pprint.pprint(file_variables, sys.stdout) generator = file_variables.get('generator') if generator: #print("XXX GENERATOR %s:" % data_key); pprint.pprint(generator, sys.stdout) data_file = generator.get('data_file') iteration_list_key = generator.get('iteration_list_key') iteration_item_key = generator.get('iteration_item_key', 'item') output_filename = generator.get('output_filename') if data_file: generator_data = self.data.get(data_file) if not generator_data: raise SetupError('%s generator data_file "%s" not found. Keys: %s' % (page.file_path, data_file, self.data.keys())) else: generator_data = file_variables if iteration_list_key: iteration_list = generator_data.get(iteration_list_key) if not iteration_list: raise SetupError('%s generator could not find key "%s" in generator data' % (page.file_path, iteration_list_key)) #print("XXX ROOT %s:" % data_key); pprint.pprint(iteration_list, sys.stdout) if not output_filename: raise SetupError('%s generator did not include output_filename' % (page.file_path,)) page_name_template = self.setup.jinja.from_string(output_filename) for iteration_item in iteration_list: #print("XXX ITERATION ITEM"); pprint.pprint(iteration_item, sys.stdout) # i love daddy # automatic_variables = dict( # page=page.file_path, # root=page.relative_root_path, # ) context = global_context.new_child({ iteration_item_key: iteration_item, iteration_list_key: iteration_list, **file_variables, })#.new_child(file_variables) page_name = page_name_template.render(context) #print("XXX page_name_template=[%s] -> page_name=[%s]" % (output_filename, page_name)) page.write( out_path=join(self.setup.dist_dir, page_name), context=context # global_context.new_child(file_variables), ) else: # no generator page.write( out_path=join(self.setup.dist_dir, page.output_file_path), context=global_context.new_child(file_variables), ) # # Ensure the "pages" part of the path is trimed, so: # # "pages/index.html" -> ".../dist/index.html" # # "pages/about/foo.html" -> ".../dist/about/foo.html" # out_path = join(self.config.dist_dir, page.output_file_path) except jinja2.exceptions.TemplateSyntaxError as tse: self.setup.log.error("%s:%s: %s %s" % (tse.filename, tse.lineno, tse.name, tse.message)) sys.exit(1)
(t.get('FMLevelName'), t.get('FMLevel')) for t in itertools.chain(six.itervalues(canonical_exposures_profile_simple), six.itervalues(canonical_accounts_profile)) ) if t != (None,None)], key=lambda t: t[1]) ) fm_term_types = tuple(FM_TERMS[k]['desc'] for k in FM_TERMS) fm_profile_types = ('acc', 'loc',) keys_status_flags = tuple(OASIS_KEYS_STATUS[k]['id'] for k in OASIS_KEYS_STATUS) peril_ids = tuple(OASIS_PERILS[k]['id'] for k in OASIS_PERILS) oed_peril_ids = tuple(OED_PERILS[k]['id'] for k in OED_PERILS) # Used simple echo command rather than ktools conversion utility for testing purposes ECHO_CONVERSION_INPUT_FILES = {k: ChainMap({'conversion_tool': 'echo'}, v) for k, v in INPUT_FILES.items()} def standard_input_files(min_size=0): return lists( sampled_from([target['name'] for target in chain(six.itervalues(GUL_INPUT_FILES), six.itervalues(OPTIONAL_INPUT_FILES))]), min_size=min_size, unique=True, ) def il_input_files(min_size=0): return lists( sampled_from([target['name'] for target in six.itervalues(IL_INPUT_FILES)]), min_size=min_size, unique=True, )
def getArguments(): # Program Internal settings # I know that it is slower to load this way but it is more explicit and readable in my opinion program_defaults = {} program_defaults['github_url'] = 'https://github.com' program_defaults['github_organization'] = 'dellemc-symphony' program_defaults['giteos2_url'] = 'https://eos2git.cec.lab.emc.com' program_defaults['giteos2_organization'] = 'VCE-Symphony' program_defaults[ 'giteos2_certs'] = '/opt/security/EMC_CA_GIT_HUB_Combo.pem' program_defaults['root_parent_version'] = '1.1.0' program_defaults['git_branch'] = 'master' # Property File settings property_file_name = os.path.splitext( os.path.basename(__file__))[0] + '.props' property_file_path = os.path.realpath( os.path.join( os.getcwd(), os.path.dirname(property_file_name))) + os.sep + property_file_name property_file_properties = {} #If no property file exists, don't sweat it just keep going. try: config = ConfigParser() with open(property_file_path) as stream: stream = StringIO("[root]\n" + stream.read()) config.readfp(stream) property_file_properties = dict(config.items('root')) except IOError: pass # Command Line settings parser = argparse.ArgumentParser() parser.add_argument('-gu', '--github_username', help='User name associated with Github account.') parser.add_argument('-gp', '--github_password', help='Password associated with Github account') parser.add_argument( '-gt', '--github_authtoken', help='Authentication token associated with Github account.') parser.add_argument('-go', '--github_organization', help='Github source organization. Default: ' + program_defaults['github_organization']) parser.add_argument('-eos2url', '--giteos2_url', help='eos2 git URL. Default: ' + program_defaults['giteos2_url']) parser.add_argument('-eos2u', '--giteos2_username', help='User name associated with eos2 account.') parser.add_argument('-eos2p', '--giteos2_password', help='Password associated with eos2 account') parser.add_argument( '-eos2t', '--giteos2_authtoken', help='Authentication token associated with eos2 account.') parser.add_argument('-eos2o', '--giteos2_organization', help='eos2 source organization. Default: ' + program_defaults['giteos2_organization']) parser.add_argument( '-rpv', '--root_parent_version', help= 'The root-parent version used in the generated maven parent pom.xml.') parser.add_argument( '-gb', '--git_branch', help='The git branch that should be checkout in each repository.') namespace = parser.parse_args() # Create a dictionary of the given parser command line inputs command_line_args = {k: v for k, v in vars(namespace).items() if v} # Now create a chainmap of all the dictionaries in the order of precedence. return ChainMap(command_line_args, os.environ, property_file_properties, program_defaults)
def lambda_handler(event, context, debug=False): missing = [key for key in ENV_KEYS if key not in os.environ] if missing: print('Missing required environment keys:', ', '.join(missing)) return if debug: import sys sys.path.insert( 0, os.path.join(os.path.dirname(__file__), 'dependencies')) print(os.path.join(os.path.dirname(__file__), 'dependencies')) from github3 import login if 'Records' in event: # SNS if VERBOSE: event_type = event['Records'][0]['Sns']['MessageAttributes'][ 'X-Github-Event']['Value'] print(event_type + ': ' + event['Records'][0]['Sns']['Message']) message = json.loads(event['Records'][0]['Sns']['Message']) else: # API message = event if VERBOSE: print('API: ' + json.dumps(event, indent=2)) if 'pull_request' not in message: print('Not a PR event. Aborting') return action = message.get('action') pr_id = message.get('number') if action not in ('opened', 'synchronize'): print('Not handling {} action for Pull Request {}'.format( action, pr_id)) return author = message['pull_request']['user']['login'] base_repo_owner = message['pull_request']['base']['repo']['owner']['login'] base_repo = message['pull_request']['base']['repo']['name'] base_repo_full_name = message['pull_request']['base']['repo']['full_name'] head_repo_owner = message['pull_request']['head']['repo']['owner']['login'] head_repo = message['pull_request']['head']['repo']['name'] head_sha = message['pull_request']['head']['sha'] base_branch = message['pull_request']['base']['ref'] head_branch = message['pull_request']['head']['ref'] if base_repo_full_name.lower() not in config.repos: print("Got event for unexpected repo {}".format(base_repo_full_name)) return repo_config = ChainMap(config.repos[base_repo_full_name.lower()], config.default, EMPTY_REPO_CONFIG) if base_branch in repo_config['ignore_base_branch']: print('PR {} is targetting {} branch, aborting'.format( pr_id, base_branch)) return if author in repo_config['ignore_login']: print('Ignoring pull request {} from {}'.format(pr_id, author)) return gh = login(os.environ['GH_USER'], password=os.environ['GH_TOKEN']) issue = gh.issue(base_repo_owner, base_repo, pr_id) pr = gh.pull_request(base_repo_owner, base_repo, pr_id) head_repo = gh.repository(head_repo_owner, head_repo) head_commit = head_repo.commit(head_sha) files_changed = pr.files() current_labels = set(str(l) for l in issue.original_labels) # Calculate which labels to add and remove # Team Labels label_tests = { label: (author in users) for label, users in repo_config['team_labels'].items() } # File Pattern Labels for label, patterns in repo_config['file_pattern_labels'].items(): label_tests[label] = False if isinstance(patterns, str): patterns = [patterns] for pattern in patterns: if isinstance(pattern, str): match = any( fnmatch(pfile.filename, pattern) for pfile in files_changed) else: match = any( pattern.match(pfile.filename) is not None for pfile in files_changed) if match: label_tests[label] = True break if label_tests[label]: continue # Base Branch Labels label_tests.update({ label: fnmatch(base_branch, pattern) or label_tests.get(label, False) for label, pattern in repo_config['base_branch_labels'].items() }) # Head Branch Labels label_tests.update({ label: fnmatch(head_branch, pattern) or label_tests.get(label, False) for label, pattern in repo_config['head_branch_labels'].items() }) # Find labels to remove: remove_labels = current_labels & set( label for label, to_add in label_tests.items() if not to_add) # Labels to add: add_labels = (set(lab for lab, to_add in label_tests.items() if to_add) - current_labels) # new set of labels: new_labels = (current_labels - remove_labels) | add_labels if new_labels != current_labels: print('Changing labels on PR#{0}.'.format(pr.number)) if add_labels: print('Adding {0}'.format(', '.join(add_labels))) if remove_labels: print('Removing {0}'.format(','.join(remove_labels))) if not debug: if add_labels: issue.add_labels(*add_labels) for label in remove_labels: issue.remove_label(label) if repo_config['commit_status']: repo = gh.repository(base_repo_owner, base_repo) current_statuses = set(status.context for status in head_commit.statuses()) for context, description in repo_config['commit_status'].items(): if context in current_statuses: print('Skipping setting commit status {}, already set.'.format( context)) elif debug: print('Settting {} status {} to {}: {}'.format( head_commit.sha, context, 'pending', description)) else: repo.create_status(head_commit.sha, 'pending', context=context, description=description) print('Handled pull request {}'.format(pr_id))
def __init__(self, *maps): ChainMap.__init__(self, *maps)
def splits(cls, config): folder = config["data_folder"] wanted_words = config["wanted_words"] unknown_prob = config["unknown_prob"] train_pct = config["train_pct"] dev_pct = config["dev_pct"] test_pct = config["test_pct"] words = {word: i + 2 for i, word in enumerate(wanted_words)} words.update({cls.LABEL_SILENCE:0, cls.LABEL_UNKNOWN:1}) sets = [{}, {}, {}] unknowns = [0] * 3 bg_noise_files = [] unknown_files = [] for folder_name in os.listdir(folder): path_name = os.path.join(folder, folder_name) is_bg_noise = False if os.path.isfile(path_name): continue if folder_name in words: label = words[folder_name] elif folder_name == "_background_noise_": is_bg_noise = True else: label = words[cls.LABEL_UNKNOWN] for filename in os.listdir(path_name): wav_name = os.path.join(path_name, filename) if is_bg_noise and os.path.isfile(wav_name): bg_noise_files.append(wav_name) continue elif label == words[cls.LABEL_UNKNOWN]: unknown_files.append(wav_name) continue if config["group_speakers_by_id"]: hashname = re.sub(r"_nohash_.*$", "", filename) max_no_wavs = 2**27 - 1 bucket = int(hashlib.sha1(hashname.encode()).hexdigest(), 16) bucket = (bucket % (max_no_wavs + 1)) * (100. / max_no_wavs) if bucket < dev_pct: tag = DatasetType.DEV elif bucket < test_pct + dev_pct: tag = DatasetType.TEST else: tag = DatasetType.TRAIN sets[tag.value][wav_name] = label for tag in range(len(sets)): unknowns[tag] = int(unknown_prob * len(sets[tag])) random.shuffle(unknown_files) a = 0 for i, dataset in enumerate(sets): b = a + unknowns[i] unk_dict = {u: words[cls.LABEL_UNKNOWN] for u in unknown_files[a:b]} dataset.update(unk_dict) a = b train_cfg = ChainMap(dict(bg_noise_files=bg_noise_files), config) test_cfg = ChainMap(dict(bg_noise_files=bg_noise_files, noise_prob=0), config) datasets = (cls(sets[0], DatasetType.TRAIN, train_cfg), cls(sets[1], DatasetType.DEV, test_cfg), cls(sets[2], DatasetType.TEST, test_cfg)) return datasets
fm_terms = tuple(k for k in FM_TERMS) fm_profile_types = ( 'acc', 'loc', ) keys_status_flags = tuple(v['id'] for v in viewvalues(OASIS_KEYS_STATUS)) perils = tuple(v['id'] for v in viewvalues(PERILS)) peril_groups = tuple(v['id'] for v in viewvalues(PERIL_GROUPS)) # Used simple echo command rather than ktools conversion utility for testing purposes ECHO_CONVERSION_INPUT_FILES = { k: ChainMap({'conversion_tool': 'echo'}, v) for k, v in INPUT_FILES.items() } def standard_input_files(min_size=0): return lists( sampled_from([ target['name'] for target in chain( viewvalues(GUL_INPUT_FILES), viewvalues(OPTIONAL_INPUT_FILES)) ]), min_size=min_size, unique=True, )
class ArgumentParser(argparse.ArgumentParser): """ Sets up arguments and overrides default ArgumentParser error """ JAR_PATH_ARG_NAME = "path" PROGRAM_CONF_ARG_NAME = "program_conf" CONFIG_OUTPUT_PATH = "out_config" FIXED_SPARK_PARAM = "fixed_param" PROGRAM_FLAGS = ChainMap(FLAG_TO_DIRECT_PARAM, FLAG_TO_CONF_PARAM) @staticmethod def make_flag(param): return "--" + param @staticmethod def make_help_msg(desc): if isinstance(desc, tuple): return str(desc[1]).replace('\r\n', '').rstrip('.') + \ ". Default: " + str(desc[0]) + "." return desc def __init__(self, *args, **kwargs): super(ArgumentParser, self).__init__(*args, **kwargs) # Program information self.add_argument(ArgumentParser.make_flag(self.JAR_PATH_ARG_NAME), type=str, required=True, help="Fully qualified JAR path") self.add_argument(ArgumentParser.make_flag(self.PROGRAM_CONF_ARG_NAME), type=str, required=False, help="Program-specific parameters") self.add_argument(ArgumentParser.make_flag(self.CONFIG_OUTPUT_PATH), type=str, required=False, help="Output config storage location") self.add_argument(ArgumentParser.make_flag(self.FIXED_SPARK_PARAM), type=str, required=False, default="", help="List of fixed Spark parameters included as is" "in every run") for param in ArgumentParser.PROGRAM_FLAGS: required = True if param in REQUIRED_FLAGS else False param_obj = ArgumentParser.PROGRAM_FLAGS[param] param_flag = ArgumentParser.make_flag(param) # param_obj.desc will be a tuple if a default value is # present (as it is for many param in spark_2_4_params.csv.). param_desc = ArgumentParser.make_help_msg(param_obj.desc) self.add_argument(param_flag, type=param_obj.make_param_from_str, required=required, help=param_desc) def error(self, message): """Overwrites default error function""" print("Error: " + message, file=stderr) self.print_usage(stderr) raise ArgumentParserError("ArgumentParserError", message)
def _get_defaults(cls): # subclasses will override to change defaults using the ChainMap # layering values_map_deque, defaults_map_deque = cls._get_defaults_map_deques() return ChainMap(*values_map_deque), ChainMap(*defaults_map_deque)
def __setattr__(self, key, value): if key == 'maps' or key in self.__dict__: ChainMap.__setattr__(self, key, value) else: self.maps[0][key] = value
def splits(cls, config): folder = config["data_folder"] wanted_words = config["wanted_words"] unknown_prob = config["unknown_prob"] train_pct = config["train_pct"] dev_pct = config["dev_pct"] test_pct = config["test_pct"] words = {word: i + 2 for i, word in enumerate(wanted_words)} words.update({cls.LABEL_SILENCE: 0, cls.LABEL_UNKNOWN: 1}) sets = [{}, {}, {}] unknowns = [0] * 3 bg_noise_files = [] bg_files = [] unknown_files = [] class_count = {} for folder_name in os.listdir(folder): path_name = os.path.join(folder, folder_name) if os.path.isdir(path_name): number_item = len(os.listdir(path_name)) # print(path_name,number_item) if folder_name in words: class_count[str(path_name)] = number_item # print('class count', class_count) dev_count = int( (dev_pct / 100) * np.min([v for v in class_count.values()])) # print('dev count' , dev_count) for folder_name in os.listdir(folder): path_name = os.path.join(folder, folder_name) is_bg_noise = False is_bg = False if folder_name != "_background_noise_" and folder_name != "_background_" and folder_name in words: dev_data = list( np.random.choice(np.arange(len(os.listdir(path_name))), dev_count, replace=False)) if os.path.isfile(path_name): continue if folder_name in words: label = words[folder_name] elif folder_name == "_background_noise_": is_bg_noise = True elif folder_name == "_background_": is_bg = True else: label = words[cls.LABEL_UNKNOWN] for i, filename in enumerate(os.listdir(path_name)): wav_name = os.path.join(path_name, filename) if is_bg_noise and os.path.isfile(wav_name): bg_noise_files.append(wav_name) continue elif is_bg and os.path.isfile(wav_name): bg_files.append(wav_name) continue elif label == words[cls.LABEL_UNKNOWN]: unknown_files.append(wav_name) continue if config["group_speakers_by_id"]: hashname = re.sub(r"_nohash_.*$", "", filename) max_no_wavs = 2**27 - 1 bucket = int(hashlib.sha1(hashname.encode()).hexdigest(), 16) bucket = (bucket % (max_no_wavs + 1)) * (100. / max_no_wavs) test_con = True if i in dev_data: tag = DatasetType.DEV else: tag = DatasetType.TRAIN if test_con and i < 5: sets[DatasetType.TEST.value][wav_name] = label sets[tag.value][wav_name] = label unknowns[0] = len(unknown_files) - dev_count unknowns[1] = dev_count random.shuffle(unknown_files) a = 0 for i, dataset in enumerate(sets): b = a + unknowns[i] unk_dict = { u: words[cls.LABEL_UNKNOWN] for u in unknown_files[a:b] } dataset.update(unk_dict) a = b # print(bg_noise_files) # print(bg_files) train_cfg = ChainMap( dict(bg_noise_files=bg_noise_files, bg_files=bg_files), config) test_cfg = ChainMap( dict(bg_noise_files=bg_noise_files, bg_files=bg_files, noise_prob=0), config) datasets = (cls(sets[0], DatasetType.TRAIN, train_cfg), cls(sets[1], DatasetType.DEV, test_cfg), cls(sets[2], DatasetType.TEST, test_cfg)) return datasets