Пример #1
0
    def merge_params(self, arg_dict, tuner_cfg_dict):
        """
        Extracts all Spark direct and conf parameters from program
        arguments and from OpenTuner config dict, and merges them with
        their respective Spark default parameters. The function assumes
        that all configurable parameters (i.e. range types) in the
        arg_dict are over-written by specific param values in
        tuner_cfg_dict.

        :param arg_dict: program argument dict that maps a program flag
        to corresponding SparkParamType
        :param tuner_cfg_dict: OpenTuner config dict, which map a program
        flag to a corresponding SparkParamType, which is guaranteed to be
        a non-range value
        :return: a tuple of two dicts, the first containing all
        Spark direct parameters, and the second containing all
        Spark conf parameters. The keys for both are Spark parameter names,
        and not program flags.
        """
        input_direct_params = {}
        input_conf_params = {}

        # Extract direct and conf param from input dicts.
        # Note the order: tuner_cfg_dict takes precedence over arg_dict
        # to ensure that all configurable parameters (i.e. range
        # types) in the arg_dict are over-written by specific param
        # values. TODO Might want to assert:
        # type(param) is SparkParamType and type(param.value) is not tuple
        input_dict = dict(ChainMap({}, tuner_cfg_dict, arg_dict))
        for flag, param in input_dict.items():
            param_val = param.value
            # To ensure that we explicitly specify memory units - lest
            # Spark/YARN misinterprets the input - we use
            # `Util.format_size` here to 'round' all values to
            # kibibytes. For general units, there a small risk that the
            # rounding here - done outside of Opentuner configuration - may
            # throw off any underlying optimization algorithm.
            # TODO figure out when rounding here might cause issues
            if isinstance(param, SparkMemoryType):
                param_val = Util.format_size(param_val, 'k')

            if flag in FLAG_TO_DIRECT_PARAM:
                input_direct_params[param.spark_name] = param_val
            elif flag in FLAG_TO_CONF_PARAM:
                input_conf_params[param.spark_name] = param_val

        # merge input dicts with defaults
        direct_param_default = SparkParamType.get_value_map(
            self.direct_param_default)
        direct_params = ChainMap({}, input_direct_params, direct_param_default)

        conf_defaults = SparkParamType.get_value_map(self.conf_defaults)
        conf_params = ChainMap({}, input_conf_params, conf_defaults)

        return dict(direct_params), dict(conf_params)
Пример #2
0
def get_app_spec(app, context_processors=None):
    if isinstance(app, six.string_types):
        return ChainMap({
            'path': app,
            'context_processors': context_processors
        }, _app_spec_defaults)
    else:
        if 'path' not in app:
            raise ValueError(
                'Each app specified must be a string or a dictionary containing a path'
            )

        app.setdefault('context_processors', []).extend(context_processors
                                                        or [])
        return ChainMap(app, _app_spec_defaults)
def aggregate_and_send_metrics(url, app_name, instance_id, custom_headers,
                               features, ondisk_cache):
    feature_stats_list = []

    for feature_name in features.keys():
        feature_stats = {
            features[feature_name].name: {
                "yes": features[feature_name].yes_count,
                "no": features[feature_name].no_count
            }
        }

        features[feature_name].reset_stats()
        feature_stats_list.append(feature_stats)

    metrics_request = {
        "appName": app_name,
        "instanceId": instance_id,
        "bucket": {
            "start": ondisk_cache[METRIC_LAST_SENT_TIME].isoformat(),
            "stop": datetime.now(timezone.utc).isoformat(),
            "toggles": dict(ChainMap(*feature_stats_list))
        }
    }

    send_metrics(url, metrics_request, custom_headers)
    ondisk_cache[METRIC_LAST_SENT_TIME] = datetime.now(timezone.utc)
    ondisk_cache.sync()
Пример #4
0
def print_sessions(results, country):

    if country == 'United States,ga:country==Canada':
        country = 'US'
    elif country == 'United Kingdom':
        country = 'UK'

    results = results.get('rows')

    def new_result(result):
        l = {"option": result[1], result[0]: result[2]}
        return l

    results = itertools.groupby(sorted(list(map(new_result, results)), key=itemgetter('option')), key=lambda x: x['option'])
    result = []
    for key, item in results:
        result.append(dict(ChainMap(*list(item)+[{'Country': country}])))

    key_lst = ['Referral', 'Direct', 'Social', 'Organic Search', 'Paid Search', 'Country', 'option']

    def merge_email(a):
        keys = list(a.keys())
        Email = 0
        for key in keys:
            if key not in key_lst:
                Email += int(a[key])
                del a[key]
        a['Email'] = str(Email)
        return a

    result = list(map(merge_email, result))
    return result
Пример #5
0
    def args_from_node(cls, node, overrides=None, defaults=None):
        if overrides is None:
            overrides = {}
        if defaults is None:
            defaults = {}

        params = ChainMap(overrides, node, defaults)
        return make_dict_from_map(params, cls.get_arg_key_map())
Пример #6
0
def print_Trafiic(results):
    results = results.get('rows')
    def new_result(result):

        l = {"option": result[0], 'traffic': result[1]}
        return l
    results = itertools.groupby(sorted(list(map(new_result, results)), key=itemgetter('option')), key=lambda x: x['option'])
    result = []
    for key, item in results:
        result.append(dict(ChainMap(*list(item))))
    return result
Пример #7
0
def print_sourceTraffic(results):
    results = results.get('rows')
    def new_result(result):
        l = {"option": result[1], 'Email' if result[0]=='(Other)' else result[0]: result[2]}
        return l
    results = itertools.groupby(sorted(list(map(new_result, results)), key=itemgetter('option')), key=lambda x: x['option'])
    result = []
    for key, item in results:
        i = [j for j in item]
        result.append(dict(ChainMap(*list(i))))
    return result
Пример #8
0
def print_conversions(results):
    def new_result(result):
        l = {"option": result[1], 'Email' if result[0] == '(Other)' else result[0]: result[2]}
        return l

    results = itertools.groupby(sorted(list(map(new_result, results)), key=itemgetter('option')),
                                key=lambda x: x['option'])
    result = []
    for key, item in results:
        result.append(dict(ChainMap(*list(item))))
    return result
Пример #9
0
class GeppettoResource(JsonResource):
    packages = [
        eClassifiers, datasources.eClassifiers, types.eClassifiers,
        values.eClassifiers, variables.eClassifiers
    ]
    chain = ChainMap(*packages)

    def serialize_eclass(self, eclass):
        return eclass.name

    @lru_cache()
    def resolve_eclass(self, uri):
        return self.chain.get(uri)
Пример #10
0
    def _get_section_env_vars(self, section):
        section_env_prefix = 'OASIS_API_{}_'.format(section.upper())
        global_env_prefix = 'OASIS_API_'

        return ChainMap(
            {
                k.replace(section_env_prefix, ''): v
                for k, v in os.environ.items()
                if k.startswith(section_env_prefix)
            },
            {
                k.replace(global_env_prefix, ''): v
                for k, v in os.environ.items()
                if k.startswith(global_env_prefix)
            },
        )
Пример #11
0
 def args_from_node(cls, node, overrides=None, defaults=None):
     if overrides is None:
         overrides = {}
     if defaults is None:
         defaults = {}
     params = ChainMap(overrides, node, defaults)
     return {
         'user': params['user'],
         'host': params['ip'],
         'port': params.get('ssh_port', cls.SSH_PORT),
         'pkey': params.get('pkey'),
         'key_filename': params.get('key_filename'),
         'password': params.get('password'),
         'name': params.get('name'),
     }
Пример #12
0
def getArguments():
    # Program Internal settings
    # I know that it is slower to load this way but it is more explicit and readable in my opinion
    program_defaults = {}
    program_defaults['debug'] = 'False'
    program_defaults['group_id'] = 'com.dell.cpsd'
    program_defaults['maven_dependency_plugin_version'] = '3.0.2'
    program_defaults['dependency_tree_output_file'] = 'dependency_tree'

    # Property File settings
    property_file_name = os.path.splitext(
        os.path.basename(__file__))[0] + '.props'
    property_file_path = os.path.realpath(
        os.path.join(
            os.getcwd(),
            os.path.dirname(property_file_name))) + os.sep + property_file_name
    property_file_properties = {}
    #If no property file exists, don't sweat it just keep going.
    try:
        config = ConfigParser()
        with open(property_file_path) as stream:
            stream = StringIO("[root]\n" + stream.read())
        config.readfp(stream)
        property_file_properties = dict(config.items('root'))
    except IOError:
        pass

    # Command Line settings
    parser = argparse.ArgumentParser()
    parser.add_argument('-db', '--debug', help='Ibid. Defaults to False')
    parser.add_argument('-gid',
                        '--group_id',
                        help='Ibid. Defaults to com.dell.cpsd')
    parser.add_argument('-mpv',
                        '--maven_dependency_plugin_version',
                        help='Ibid. Defaults to 3.0.2')
    parser.add_argument('-dtof',
                        '--dependency_tree_output_file',
                        help='Ibid. Defaults to dependency_tree')
    namespace = parser.parse_args()
    # Create a dictionary of the given parser command line inputs
    command_line_args = {k: v for k, v in vars(namespace).items() if v}

    # Now create a chainmap of all the dictionaries in the order of precedence.
    return ChainMap(command_line_args, os.environ, property_file_properties,
                    program_defaults)
Пример #13
0
 def from_node(cls, node, overrides=None, defaults=None):
     if overrides is None:
         overrides = {}
     if defaults is None:
         defaults = {}
     params = ChainMap(overrides, node, defaults)
     return cls(
         user=params['user'],
         host=params['ip'],
         # paramiko doesn't like None default, requires SSH_PORT default
         port=params.get('ssh_port', SSH_PORT),
         pkey=params.get('pkey'),
         key_filename=params.get('key_filename'),
         password=params.get('password'),
         name=params.get('name'))
Пример #14
0
    def splits(cls, config):
        folder = config["data_folder"]
        wanted_words = config["wanted_words"]
        unknown_prob = config["unknown_prob"]
        train_pct = config["train_pct"]
        dev_pct = config["dev_pct"]
        test_pct = config["test_pct"]
        snr = config["snr"]

        words = {word: i + 2 for i, word in enumerate(wanted_words)}
        words.update({cls.LABEL_SILENCE: 0, cls.LABEL_UNKNOWN: 1})
        sets = [{}, {}, {}]
        unknowns = [0] * 3
        bg_noise_files = []
        unknown_files = []
        wav_path = []

        print("snr:{}".format(snr))
        path_listname = ['/home/guyue/CNNProgram/datalist/snr', snr, '.lst']
        path_listname = ''.join(path_listname)
        with open(path_listname) as f:
            for line in f.readlines():
                data = line.split(' ')
                wav_name = data[0][:]
                wordname = data[1][0:-1]
                if wordname in words:
                    label = words[wordname]
                else:
                    label = words[cls.LABEL_UNKNOWN]
                if label == words[cls.LABEL_UNKNOWN]:
                    unknown_files.append(wav_name)
                    continue
                if config["group_speakers_by_id"]:
                    hashname = re.sub(r"_nohash_.*$", "", wav_name)
                    hashname = hashname.split('/')
                    hashname = hashname[-1][:]
                max_no_wavs = 2**27 - 1
                bucket = int(hashlib.sha1(hashname.encode()).hexdigest(), 16)
                bucket = (bucket % (max_no_wavs + 1)) * (100. / max_no_wavs)
                if bucket < dev_pct:
                    tag = DatasetType.DEV
                elif bucket < test_pct + dev_pct:
                    tag = DatasetType.TEST
                else:
                    tag = DatasetType.TRAIN
                sets[tag.value][wav_name] = label

        for tag in range(len(sets)):
            unknowns[tag] = int(unknown_prob * len(sets[tag]))
        random.shuffle(unknown_files)
        a = 0
        for i, dataset in enumerate(sets):
            b = a + unknowns[i]
            unk_dict = {
                u: words[cls.LABEL_UNKNOWN]
                for u in unknown_files[a:b]
            }
            dataset.update(unk_dict)
            a = b

        train_cfg = ChainMap(dict(bg_noise_files=bg_noise_files), config)
        test_cfg = ChainMap(dict(bg_noise_files=bg_noise_files, noise_prob=0),
                            config)
        datasets = (cls(sets[0], DatasetType.TRAIN,
                        train_cfg), cls(sets[1], DatasetType.DEV, test_cfg),
                    cls(sets[2], DatasetType.TEST, test_cfg))
        return datasets
Пример #15
0
    def splits(cls, config):
        folder = config["data_folder"]  # data/speech_dataset
        wanted_words = config[
            "wanted_words"]  # ['yes', 'no', 'up', 'down', 'left', 'right', 'on', 'off', 'stop', 'go']
        unknown_prob = config["unknown_prob"]  # 0.1
        train_pct = config["train_pct"]  # 80
        dev_pct = config["dev_pct"]  # 10
        test_pct = config["test_pct"]  # 10

        words = {word: i + 2 for i, word in enumerate(wanted_words)}
        # {'yes': 2, 'no': 3, 'up': 4, 'down': 5, 'left': 6, 'right': 7, 'on': 8, 'off': 9, 'stop': 10, 'go': 11}
        words.update({cls.LABEL_SILENCE: 0, cls.LABEL_UNKNOWN: 1})
        sets = [{}, {}, {}]
        unknowns = [0] * 3
        bg_noise_files = []
        unknown_files = []

        for folder_name in os.listdir(folder):
            path_name = os.path.join(folder,
                                     folder_name)  # data/speech_dataset/yes
            is_bg_noise = False
            if os.path.isfile(path_name):
                continue
            if folder_name in words:
                label = words[folder_name]
            elif folder_name == "_background_noise_":
                is_bg_noise = True
            else:
                label = words[cls.LABEL_UNKNOWN]

            for filename in os.listdir(path_name):
                wav_name = os.path.join(
                    path_name,
                    filename)  # data/speech_dataset/down/00b01445_nohash_1.wav
                if is_bg_noise and os.path.isfile(wav_name):
                    bg_noise_files.append(wav_name)
                    continue
                elif label == words[
                        cls.
                        LABEL_UNKNOWN]:  # here the one\four folder is the UNKNOWN
                    unknown_files.append(wav_name)
                    continue
                if config["group_speakers_by_id"]:
                    hashname = re.sub(r"_nohash_.*$", "", filename)
                max_no_wavs = 2**27 - 1
                bucket = int(hashlib.sha1(hashname.encode()).hexdigest(), 16)
                # hash values  hexdigest() return 16 jinzhi
                bucket = (bucket % (max_no_wavs + 1)) * (100. / max_no_wavs)
                if bucket < dev_pct:
                    tag = DatasetType.DEV  # TRAIN = 0, DEV = 1, TEST = 2
                elif bucket < test_pct + dev_pct:  # dev_pct = 10, test_pct = 10, train_pct = 80
                    tag = DatasetType.TEST
                else:
                    tag = DatasetType.TRAIN
                if config["type"] == "eval":
                    sets[2][wav_name] = label
                elif config["type"] == "train":
                    sets[tag.value][wav_name] = label
                #  sets = [
                # train  {'00b01445_nohash_1': 1, },  length = 16696
                # dev    {'00b01443_nohash_1': 2, },  length = 2316
                # test   {'00b01441_nohash_1': 3, }   length = 2311
                #  ]

        for tag in range(len(sets)):
            unknowns[tag] = int(
                unknown_prob *
                len(sets[tag]))  # train length, validation, test
        random.shuffle(unknown_files)
        a = 0
        for i, dataset in enumerate(sets):
            b = a + unknowns[i]
            unk_dict = {
                u: words[cls.LABEL_UNKNOWN]
                for u in unknown_files[a:b]
            }
            dataset.update(unk_dict)
            a = b
            # unk_dict = {
            #   0:len(train_dataset)-1,
            #   len(train_dataset): len(train+dev_dataset)-1
            #   len(train+dev):len(train+dev+test)-1
            # }
        train_cfg = ChainMap(dict(bg_noise_files=bg_noise_files), config)
        test_cfg = ChainMap(dict(bg_noise_files=bg_noise_files, noise_prob=0),
                            config)
        # print(test_cfg)
        datasets = (cls(sets[0], DatasetType.TRAIN,
                        train_cfg), cls(sets[1], DatasetType.DEV, test_cfg),
                    cls(sets[2], DatasetType.TEST, config))
        return datasets
Пример #16
0
from chainmap import ChainMap

x = ChainMap()

try:
    x[5]
except KeyError:
    pass

a = {"a": 4}
b = {"b": 5}
x = ChainMap((a, b))
assert x["a"] == 4
assert x["b"] == 5
a["b"] = 6
b["c"] = 7
assert x["b"] == 6
assert x["c"] == 7

x = ChainMap()
assert x.maps == [{}]

x.maps = [{"a": 4}]
assert x["a"] == 4

x = ChainMap((1, ))
try:
    x[5]
except TypeError as e:
    assert e.args[0] == "'int' object is not subscriptable"
Пример #17
0
    def transform_templates(self):
        empty = dict()
        try:
            global_context = ChainMap(self.data)
            for page in self.get_all_pages():

                data_key, ext = splitext(page.file_path)

                # The context is the complete set of variables the template
                # will be able to reference.
                #
                # There are automatic globals like 'root' and 'page'.
                #
                # There are variables scoped to the file, matched by name. So
                # if there is a file 'foo.yml' containing 'title=bar' then within
                # the template 'foo.html', the variable 'title' will be defined and
                # set to the string 'bar'.

                file_variables = self.data.get(data_key) or empty

                # print("XXX Data for %s:" % data_key)
                # pprint.pprint(file_variables, sys.stdout)

                generator = file_variables.get('generator')
                if generator:
                    #print("XXX GENERATOR %s:" % data_key); pprint.pprint(generator, sys.stdout)

                    data_file = generator.get('data_file')
                    iteration_list_key = generator.get('iteration_list_key')
                    iteration_item_key = generator.get('iteration_item_key', 'item')
                    output_filename = generator.get('output_filename')

                    if data_file:
                        generator_data = self.data.get(data_file)
                        if not generator_data:
                            raise SetupError('%s generator data_file "%s" not found. Keys: %s' % (page.file_path, data_file, self.data.keys()))

                    else:
                        generator_data = file_variables

                    if iteration_list_key:
                        iteration_list = generator_data.get(iteration_list_key)
                        if not iteration_list:
                            raise SetupError('%s generator could not find key "%s" in generator data' % (page.file_path, iteration_list_key))

                    #print("XXX ROOT %s:" % data_key); pprint.pprint(iteration_list, sys.stdout)

                    if not output_filename:
                        raise SetupError('%s generator did not include output_filename' % (page.file_path,))

                    page_name_template = self.setup.jinja.from_string(output_filename)

                    for iteration_item in iteration_list:
                        #print("XXX ITERATION ITEM"); pprint.pprint(iteration_item, sys.stdout)

                        # i love daddy
                        # automatic_variables = dict(
                        #     page=page.file_path,
                        #     root=page.relative_root_path,
                        # )

                        context = global_context.new_child({
                            iteration_item_key: iteration_item,
                            iteration_list_key: iteration_list,
                            **file_variables,
                        })#.new_child(file_variables)

                        page_name = page_name_template.render(context)
                        #print("XXX page_name_template=[%s] -> page_name=[%s]" % (output_filename, page_name))

                        page.write(
                            out_path=join(self.setup.dist_dir, page_name),
                            context=context  # global_context.new_child(file_variables),
                        )

                else:  # no generator

                    page.write(
                        out_path=join(self.setup.dist_dir, page.output_file_path),
                        context=global_context.new_child(file_variables),
                    )

                    # # Ensure the "pages" part of the path is trimed, so:
                    # #   "pages/index.html" -> ".../dist/index.html"
                    # #   "pages/about/foo.html" -> ".../dist/about/foo.html"
                    # out_path = join(self.config.dist_dir, page.output_file_path)


        except jinja2.exceptions.TemplateSyntaxError as tse:
            self.setup.log.error("%s:%s: %s %s" % (tse.filename, tse.lineno, tse.name, tse.message))
            sys.exit(1)
Пример #18
0
        (t.get('FMLevelName'), t.get('FMLevel'))
        for t in itertools.chain(six.itervalues(canonical_exposures_profile_simple), six.itervalues(canonical_accounts_profile))
    ) if t != (None,None)], key=lambda t: t[1])
)

fm_term_types = tuple(FM_TERMS[k]['desc'] for k in FM_TERMS)

fm_profile_types = ('acc', 'loc',)

keys_status_flags = tuple(OASIS_KEYS_STATUS[k]['id'] for k in OASIS_KEYS_STATUS)

peril_ids = tuple(OASIS_PERILS[k]['id'] for k in OASIS_PERILS)
oed_peril_ids = tuple(OED_PERILS[k]['id'] for k in OED_PERILS)

# Used simple echo command rather than ktools conversion utility for testing purposes
ECHO_CONVERSION_INPUT_FILES = {k: ChainMap({'conversion_tool': 'echo'}, v) for k, v in INPUT_FILES.items()}

def standard_input_files(min_size=0):
    return lists(
        sampled_from([target['name'] for target in chain(six.itervalues(GUL_INPUT_FILES), six.itervalues(OPTIONAL_INPUT_FILES))]),
        min_size=min_size,
        unique=True,
    )


def il_input_files(min_size=0):
    return lists(
        sampled_from([target['name'] for target in six.itervalues(IL_INPUT_FILES)]),
        min_size=min_size,
        unique=True,
    )
def getArguments():
    # Program Internal settings
    # I know that it is slower to load this way but it is more explicit and readable in my opinion
    program_defaults = {}
    program_defaults['github_url'] = 'https://github.com'
    program_defaults['github_organization'] = 'dellemc-symphony'
    program_defaults['giteos2_url'] = 'https://eos2git.cec.lab.emc.com'
    program_defaults['giteos2_organization'] = 'VCE-Symphony'
    program_defaults[
        'giteos2_certs'] = '/opt/security/EMC_CA_GIT_HUB_Combo.pem'
    program_defaults['root_parent_version'] = '1.1.0'
    program_defaults['git_branch'] = 'master'

    # Property File settings
    property_file_name = os.path.splitext(
        os.path.basename(__file__))[0] + '.props'
    property_file_path = os.path.realpath(
        os.path.join(
            os.getcwd(),
            os.path.dirname(property_file_name))) + os.sep + property_file_name
    property_file_properties = {}
    #If no property file exists, don't sweat it just keep going.
    try:
        config = ConfigParser()
        with open(property_file_path) as stream:
            stream = StringIO("[root]\n" + stream.read())
        config.readfp(stream)
        property_file_properties = dict(config.items('root'))
    except IOError:
        pass

    # Command Line settings
    parser = argparse.ArgumentParser()
    parser.add_argument('-gu',
                        '--github_username',
                        help='User name associated with Github account.')
    parser.add_argument('-gp',
                        '--github_password',
                        help='Password associated with Github account')
    parser.add_argument(
        '-gt',
        '--github_authtoken',
        help='Authentication token associated with Github account.')
    parser.add_argument('-go',
                        '--github_organization',
                        help='Github source organization. Default: ' +
                        program_defaults['github_organization'])
    parser.add_argument('-eos2url',
                        '--giteos2_url',
                        help='eos2 git URL. Default: ' +
                        program_defaults['giteos2_url'])
    parser.add_argument('-eos2u',
                        '--giteos2_username',
                        help='User name associated with eos2 account.')
    parser.add_argument('-eos2p',
                        '--giteos2_password',
                        help='Password associated with eos2 account')
    parser.add_argument(
        '-eos2t',
        '--giteos2_authtoken',
        help='Authentication token associated with eos2 account.')
    parser.add_argument('-eos2o',
                        '--giteos2_organization',
                        help='eos2 source organization. Default: ' +
                        program_defaults['giteos2_organization'])
    parser.add_argument(
        '-rpv',
        '--root_parent_version',
        help=
        'The root-parent version used in the generated maven parent pom.xml.')
    parser.add_argument(
        '-gb',
        '--git_branch',
        help='The git branch that should be checkout in each repository.')
    namespace = parser.parse_args()
    # Create a dictionary of the given parser command line inputs
    command_line_args = {k: v for k, v in vars(namespace).items() if v}

    # Now create a chainmap of all the dictionaries in the order of precedence.
    return ChainMap(command_line_args, os.environ, property_file_properties,
                    program_defaults)
Пример #20
0
def lambda_handler(event, context, debug=False):
    missing = [key for key in ENV_KEYS if key not in os.environ]

    if missing:
        print('Missing required environment keys:', ', '.join(missing))
        return

    if debug:
        import sys
        sys.path.insert(
            0, os.path.join(os.path.dirname(__file__), 'dependencies'))
        print(os.path.join(os.path.dirname(__file__), 'dependencies'))

    from github3 import login

    if 'Records' in event:
        # SNS
        if VERBOSE:
            event_type = event['Records'][0]['Sns']['MessageAttributes'][
                'X-Github-Event']['Value']
            print(event_type + ': ' + event['Records'][0]['Sns']['Message'])
        message = json.loads(event['Records'][0]['Sns']['Message'])
    else:
        # API
        message = event
        if VERBOSE:
            print('API: ' + json.dumps(event, indent=2))

    if 'pull_request' not in message:
        print('Not a PR event. Aborting')
        return

    action = message.get('action')
    pr_id = message.get('number')

    if action not in ('opened', 'synchronize'):
        print('Not handling {} action for Pull Request {}'.format(
            action, pr_id))
        return

    author = message['pull_request']['user']['login']

    base_repo_owner = message['pull_request']['base']['repo']['owner']['login']
    base_repo = message['pull_request']['base']['repo']['name']
    base_repo_full_name = message['pull_request']['base']['repo']['full_name']

    head_repo_owner = message['pull_request']['head']['repo']['owner']['login']
    head_repo = message['pull_request']['head']['repo']['name']
    head_sha = message['pull_request']['head']['sha']

    base_branch = message['pull_request']['base']['ref']
    head_branch = message['pull_request']['head']['ref']

    if base_repo_full_name.lower() not in config.repos:
        print("Got event for unexpected repo {}".format(base_repo_full_name))
        return

    repo_config = ChainMap(config.repos[base_repo_full_name.lower()],
                           config.default, EMPTY_REPO_CONFIG)

    if base_branch in repo_config['ignore_base_branch']:
        print('PR {} is targetting {} branch, aborting'.format(
            pr_id, base_branch))
        return

    if author in repo_config['ignore_login']:
        print('Ignoring pull request {} from {}'.format(pr_id, author))
        return

    gh = login(os.environ['GH_USER'], password=os.environ['GH_TOKEN'])

    issue = gh.issue(base_repo_owner, base_repo, pr_id)
    pr = gh.pull_request(base_repo_owner, base_repo, pr_id)
    head_repo = gh.repository(head_repo_owner, head_repo)
    head_commit = head_repo.commit(head_sha)

    files_changed = pr.files()
    current_labels = set(str(l) for l in issue.original_labels)

    # Calculate which labels to add and remove
    # Team Labels
    label_tests = {
        label: (author in users)
        for label, users in repo_config['team_labels'].items()
    }

    # File Pattern Labels
    for label, patterns in repo_config['file_pattern_labels'].items():
        label_tests[label] = False

        if isinstance(patterns, str):
            patterns = [patterns]

        for pattern in patterns:
            if isinstance(pattern, str):
                match = any(
                    fnmatch(pfile.filename, pattern)
                    for pfile in files_changed)
            else:
                match = any(
                    pattern.match(pfile.filename) is not None
                    for pfile in files_changed)

            if match:
                label_tests[label] = True
                break

        if label_tests[label]:
            continue

    # Base Branch Labels
    label_tests.update({
        label: fnmatch(base_branch, pattern) or label_tests.get(label, False)
        for label, pattern in repo_config['base_branch_labels'].items()
    })
    # Head Branch Labels
    label_tests.update({
        label: fnmatch(head_branch, pattern) or label_tests.get(label, False)
        for label, pattern in repo_config['head_branch_labels'].items()
    })

    # Find labels to remove:
    remove_labels = current_labels & set(
        label for label, to_add in label_tests.items() if not to_add)

    # Labels to add:
    add_labels = (set(lab for lab, to_add in label_tests.items() if to_add) -
                  current_labels)

    # new set of labels:
    new_labels = (current_labels - remove_labels) | add_labels

    if new_labels != current_labels:
        print('Changing labels on PR#{0}.'.format(pr.number))
        if add_labels:
            print('Adding {0}'.format(', '.join(add_labels)))
        if remove_labels:
            print('Removing {0}'.format(','.join(remove_labels)))

        if not debug:
            if add_labels:
                issue.add_labels(*add_labels)
            for label in remove_labels:
                issue.remove_label(label)

    if repo_config['commit_status']:
        repo = gh.repository(base_repo_owner, base_repo)
        current_statuses = set(status.context
                               for status in head_commit.statuses())

        for context, description in repo_config['commit_status'].items():
            if context in current_statuses:
                print('Skipping setting commit status {}, already set.'.format(
                    context))
            elif debug:
                print('Settting {} status {} to {}: {}'.format(
                    head_commit.sha, context, 'pending', description))
            else:
                repo.create_status(head_commit.sha,
                                   'pending',
                                   context=context,
                                   description=description)

    print('Handled pull request {}'.format(pr_id))
Пример #21
0
 def __init__(self, *maps):
     ChainMap.__init__(self, *maps)
Пример #22
0
    def splits(cls, config):
        folder = config["data_folder"]
        wanted_words = config["wanted_words"]
        unknown_prob = config["unknown_prob"]
        train_pct = config["train_pct"]
        dev_pct = config["dev_pct"]
        test_pct = config["test_pct"]

        words = {word: i + 2 for i, word in enumerate(wanted_words)}
        words.update({cls.LABEL_SILENCE:0, cls.LABEL_UNKNOWN:1})
        sets = [{}, {}, {}]
        unknowns = [0] * 3
        bg_noise_files = []
        unknown_files = []

        for folder_name in os.listdir(folder):
            path_name = os.path.join(folder, folder_name)
            is_bg_noise = False
            if os.path.isfile(path_name):
                continue
            if folder_name in words:
                label = words[folder_name]
            elif folder_name == "_background_noise_":
                is_bg_noise = True
            else:
                label = words[cls.LABEL_UNKNOWN]

            for filename in os.listdir(path_name):
                wav_name = os.path.join(path_name, filename)
                if is_bg_noise and os.path.isfile(wav_name):
                    bg_noise_files.append(wav_name)
                    continue
                elif label == words[cls.LABEL_UNKNOWN]:
                    unknown_files.append(wav_name)
                    continue
                if config["group_speakers_by_id"]:
                    hashname = re.sub(r"_nohash_.*$", "", filename)
                max_no_wavs = 2**27 - 1
                bucket = int(hashlib.sha1(hashname.encode()).hexdigest(), 16)
                bucket = (bucket % (max_no_wavs + 1)) * (100. / max_no_wavs)
                if bucket < dev_pct:
                    tag = DatasetType.DEV
                elif bucket < test_pct + dev_pct:
                    tag = DatasetType.TEST
                else:
                    tag = DatasetType.TRAIN
                sets[tag.value][wav_name] = label

        for tag in range(len(sets)):
            unknowns[tag] = int(unknown_prob * len(sets[tag]))
        random.shuffle(unknown_files)
        a = 0
        for i, dataset in enumerate(sets):
            b = a + unknowns[i]
            unk_dict = {u: words[cls.LABEL_UNKNOWN] for u in unknown_files[a:b]}
            dataset.update(unk_dict)
            a = b

        train_cfg = ChainMap(dict(bg_noise_files=bg_noise_files), config)
        test_cfg = ChainMap(dict(bg_noise_files=bg_noise_files, noise_prob=0), config)
        datasets = (cls(sets[0], DatasetType.TRAIN, train_cfg), cls(sets[1], DatasetType.DEV, test_cfg),
                cls(sets[2], DatasetType.TEST, test_cfg))
        return datasets
Пример #23
0
fm_terms = tuple(k for k in FM_TERMS)

fm_profile_types = (
    'acc',
    'loc',
)

keys_status_flags = tuple(v['id'] for v in viewvalues(OASIS_KEYS_STATUS))

perils = tuple(v['id'] for v in viewvalues(PERILS))

peril_groups = tuple(v['id'] for v in viewvalues(PERIL_GROUPS))

# Used simple echo command rather than ktools conversion utility for testing purposes
ECHO_CONVERSION_INPUT_FILES = {
    k: ChainMap({'conversion_tool': 'echo'}, v)
    for k, v in INPUT_FILES.items()
}


def standard_input_files(min_size=0):
    return lists(
        sampled_from([
            target['name'] for target in chain(
                viewvalues(GUL_INPUT_FILES), viewvalues(OPTIONAL_INPUT_FILES))
        ]),
        min_size=min_size,
        unique=True,
    )

Пример #24
0
class ArgumentParser(argparse.ArgumentParser):
    """
    Sets up arguments and overrides default ArgumentParser error
    """
    JAR_PATH_ARG_NAME = "path"
    PROGRAM_CONF_ARG_NAME = "program_conf"
    CONFIG_OUTPUT_PATH = "out_config"
    FIXED_SPARK_PARAM = "fixed_param"
    PROGRAM_FLAGS = ChainMap(FLAG_TO_DIRECT_PARAM, FLAG_TO_CONF_PARAM)

    @staticmethod
    def make_flag(param):
        return "--" + param

    @staticmethod
    def make_help_msg(desc):
        if isinstance(desc, tuple):
            return str(desc[1]).replace('\r\n', '').rstrip('.') + \
                   ". Default: " + str(desc[0]) + "."
        return desc

    def __init__(self, *args, **kwargs):
        super(ArgumentParser, self).__init__(*args, **kwargs)

        # Program information
        self.add_argument(ArgumentParser.make_flag(self.JAR_PATH_ARG_NAME),
                          type=str,
                          required=True,
                          help="Fully qualified JAR path")
        self.add_argument(ArgumentParser.make_flag(self.PROGRAM_CONF_ARG_NAME),
                          type=str,
                          required=False,
                          help="Program-specific parameters")
        self.add_argument(ArgumentParser.make_flag(self.CONFIG_OUTPUT_PATH),
                          type=str,
                          required=False,
                          help="Output config storage location")
        self.add_argument(ArgumentParser.make_flag(self.FIXED_SPARK_PARAM),
                          type=str,
                          required=False,
                          default="",
                          help="List of fixed Spark parameters included as is"
                          "in every run")

        for param in ArgumentParser.PROGRAM_FLAGS:
            required = True if param in REQUIRED_FLAGS else False
            param_obj = ArgumentParser.PROGRAM_FLAGS[param]
            param_flag = ArgumentParser.make_flag(param)
            # param_obj.desc will be a tuple if a default value is
            # present (as it is for many param in spark_2_4_params.csv.).
            param_desc = ArgumentParser.make_help_msg(param_obj.desc)
            self.add_argument(param_flag,
                              type=param_obj.make_param_from_str,
                              required=required,
                              help=param_desc)

    def error(self, message):
        """Overwrites default error function"""
        print("Error: " + message, file=stderr)
        self.print_usage(stderr)
        raise ArgumentParserError("ArgumentParserError", message)
Пример #25
0
 def _get_defaults(cls):
     # subclasses will override to change defaults using the ChainMap
     # layering
     values_map_deque, defaults_map_deque = cls._get_defaults_map_deques()
     return ChainMap(*values_map_deque), ChainMap(*defaults_map_deque)
Пример #26
0
 def __setattr__(self, key, value):
     if key == 'maps' or key in self.__dict__:
         ChainMap.__setattr__(self, key, value)
     else:
         self.maps[0][key] = value
Пример #27
0
 def splits(cls, config):
     folder = config["data_folder"]
     wanted_words = config["wanted_words"]
     unknown_prob = config["unknown_prob"]
     train_pct = config["train_pct"]
     dev_pct = config["dev_pct"]
     test_pct = config["test_pct"]
     words = {word: i + 2 for i, word in enumerate(wanted_words)}
     words.update({cls.LABEL_SILENCE: 0, cls.LABEL_UNKNOWN: 1})
     sets = [{}, {}, {}]
     unknowns = [0] * 3
     bg_noise_files = []
     bg_files = []
     unknown_files = []
     class_count = {}
     for folder_name in os.listdir(folder):
         path_name = os.path.join(folder, folder_name)
         if os.path.isdir(path_name):
             number_item = len(os.listdir(path_name))
             # print(path_name,number_item)
             if folder_name in words:
                 class_count[str(path_name)] = number_item
     # print('class count', class_count)
     dev_count = int(
         (dev_pct / 100) * np.min([v for v in class_count.values()]))
     # print('dev count' , dev_count)
     for folder_name in os.listdir(folder):
         path_name = os.path.join(folder, folder_name)
         is_bg_noise = False
         is_bg = False
         if folder_name != "_background_noise_" and folder_name != "_background_" and folder_name in words:
             dev_data = list(
                 np.random.choice(np.arange(len(os.listdir(path_name))),
                                  dev_count,
                                  replace=False))
         if os.path.isfile(path_name):
             continue
         if folder_name in words:
             label = words[folder_name]
         elif folder_name == "_background_noise_":
             is_bg_noise = True
         elif folder_name == "_background_":
             is_bg = True
         else:
             label = words[cls.LABEL_UNKNOWN]
         for i, filename in enumerate(os.listdir(path_name)):
             wav_name = os.path.join(path_name, filename)
             if is_bg_noise and os.path.isfile(wav_name):
                 bg_noise_files.append(wav_name)
                 continue
             elif is_bg and os.path.isfile(wav_name):
                 bg_files.append(wav_name)
                 continue
             elif label == words[cls.LABEL_UNKNOWN]:
                 unknown_files.append(wav_name)
                 continue
             if config["group_speakers_by_id"]:
                 hashname = re.sub(r"_nohash_.*$", "", filename)
             max_no_wavs = 2**27 - 1
             bucket = int(hashlib.sha1(hashname.encode()).hexdigest(), 16)
             bucket = (bucket % (max_no_wavs + 1)) * (100. / max_no_wavs)
             test_con = True
             if i in dev_data:
                 tag = DatasetType.DEV
             else:
                 tag = DatasetType.TRAIN
             if test_con and i < 5:
                 sets[DatasetType.TEST.value][wav_name] = label
             sets[tag.value][wav_name] = label
     unknowns[0] = len(unknown_files) - dev_count
     unknowns[1] = dev_count
     random.shuffle(unknown_files)
     a = 0
     for i, dataset in enumerate(sets):
         b = a + unknowns[i]
         unk_dict = {
             u: words[cls.LABEL_UNKNOWN]
             for u in unknown_files[a:b]
         }
         dataset.update(unk_dict)
         a = b
     # print(bg_noise_files)
     # print(bg_files)
     train_cfg = ChainMap(
         dict(bg_noise_files=bg_noise_files, bg_files=bg_files), config)
     test_cfg = ChainMap(
         dict(bg_noise_files=bg_noise_files,
              bg_files=bg_files,
              noise_prob=0), config)
     datasets = (cls(sets[0], DatasetType.TRAIN,
                     train_cfg), cls(sets[1], DatasetType.DEV, test_cfg),
                 cls(sets[2], DatasetType.TEST, test_cfg))
     return datasets