def _load_parameters(self, parameters_filepath, arguments={}, verbose=True): ''' Load parameters from the ini file if specified, take into account any command line argument, and ensure that each parameter is cast to the correct type. Command line arguments take precedence over parameters specified in the parameter file. ''' parameters = {'pretrained_model_folder':'../trained_models/conll_2003_en', 'dataset_text_folder':'../data/conll2003/en', 'character_embedding_dimension':25, 'character_lstm_hidden_state_dimension':25, 'check_for_digits_replaced_with_zeros':True, 'check_for_lowercase':True, 'debug':False, 'dropout_rate':0.5, 'experiment_name':'test', 'freeze_token_embeddings':False, 'gradient_clipping_value':5.0, 'learning_rate':0.005, 'load_only_pretrained_token_embeddings':False, 'load_all_pretrained_token_embeddings':False, 'main_evaluation_mode':'conll', 'maximum_number_of_epochs':100, 'number_of_cpu_threads':8, 'number_of_gpus':0, 'optimizer':'sgd', 'output_folder':'../output', 'patience':10, 'plot_format':'pdf', 'reload_character_embeddings':True, 'reload_character_lstm':True, 'reload_crf':True, 'reload_feedforward':True, 'reload_token_embeddings':True, 'reload_token_lstm':True, 'remap_unknown_tokens_to_unk':True, 'spacylanguage':'en', 'tagging_format':'bioes', 'token_embedding_dimension':100, 'token_lstm_hidden_state_dimension':100, 'token_pretrained_embedding_filepath':'../data/word_vectors/glove.6B.100d.txt', 'tokenizer':'spacy', 'train_model':True, 'use_character_lstm':True, 'use_crf':True, 'use_pretrained_model':False, 'verbose':False, # new arguments 'num_layers':2, 'use_deep_lstm': False} # If a parameter file is specified, load it if len(parameters_filepath) > 0: conf_parameters = configparser.ConfigParser() conf_parameters.read(parameters_filepath) nested_parameters = utils.convert_configparser_to_dictionary(conf_parameters) for k,v in nested_parameters.items(): parameters.update(v) # Ensure that any arguments the specified in the command line overwrite parameters specified in the parameter file for k,v in arguments.items(): if arguments[k] != arguments['argument_default_value']: parameters[k] = v for k,v in parameters.items(): v = str(v) # If the value is a list delimited with a comma, choose one element at random. if ',' in v: v = random.choice(v.split(',')) parameters[k] = v # Ensure that each parameter is cast to the correct type if k in ['character_embedding_dimension','character_lstm_hidden_state_dimension','token_embedding_dimension', 'token_lstm_hidden_state_dimension','patience', 'maximum_number_of_epochs','maximum_training_time','number_of_cpu_threads','number_of_gpus', 'num_layers']: parameters[k] = int(v) elif k in ['dropout_rate', 'learning_rate', 'gradient_clipping_value']: parameters[k] = float(v) elif k in ['remap_unknown_tokens_to_unk', 'use_character_lstm', 'use_crf', 'train_model', 'use_pretrained_model', 'debug', 'verbose', 'reload_character_embeddings', 'reload_character_lstm', 'reload_token_embeddings', 'reload_token_lstm', 'reload_feedforward', 'reload_crf', 'check_for_lowercase', 'check_for_digits_replaced_with_zeros', 'freeze_token_embeddings', 'load_only_pretrained_token_embeddings', 'load_all_pretrained_token_embeddings']: parameters[k] = distutils.util.strtobool(v) # If loading pretrained model, set the model hyperparameters according to the pretraining parameters if parameters['use_pretrained_model']: pretraining_parameters = self._load_parameters(parameters_filepath=os.path.join(parameters['pretrained_model_folder'], 'parameters.ini'), verbose=False)[0] for name in ['use_character_lstm', 'character_embedding_dimension', 'character_lstm_hidden_state_dimension', 'token_embedding_dimension', 'token_lstm_hidden_state_dimension', 'use_crf']: if parameters[name] != pretraining_parameters[name]: print('WARNING: parameter {0} was overwritten from {1} to {2} to be consistent with the pretrained model'.format(name, parameters[name], pretraining_parameters[name])) parameters[name] = pretraining_parameters[name] if verbose: pprint(parameters) # Update conf_parameters to reflect final parameter values conf_parameters = configparser.ConfigParser() conf_parameters.read(os.path.join('test', 'test-parameters-training.ini')) parameter_to_section = utils.get_parameter_to_section_of_configparser(conf_parameters) for k, v in parameters.items(): conf_parameters.set(parameter_to_section[k], k, str(v)) return parameters, conf_parameters
def load_parameters(**kwargs): ''' Load parameters from the ini file if specified, take into account any command line argument, and ensure that each parameter is cast to the correct type. Command line arguments take precedence over parameters specified in the parameter file. ''' print('here1') param = {} param_default = _get_default_param() print('here2') # use parameter path if provided, otherwise use default try: if kwargs['parameters_filepath']: parameters_filepath = kwargs['parameters_filepath'] except: parameters_filepath = param_default['parameters_filepath'] # print('printing the parameters_filepath', parameters_filepath) param_config, param_file_txt = _get_config_param(parameters_filepath) # Parameter file settings should overwrite default settings for k, v in param_config.items(): param[k] = v # Command line args should overwrite settings in the parameter file for k, v in kwargs.items(): param[k] = v # Any missing args can be set to default for k, v in param_default.items(): if k not in param: param[k] = param_default[k] # clean the data types param = _clean_param_dtypes(param) # print(param) # if loading a pretrained model, set to pretrain hyperparameters if param['use_pretrained_model']: pretrain_path = os.path.join(param['pretrained_model_folder'], 'parameters.ini') # print('printing the pretrain path', pretrain_path) if os.path.isfile(pretrain_path): pretrain_param, _ = _get_config_param(pretrain_path) pretrain_param = _clean_param_dtypes(pretrain_param) pretrain_list = [ 'use_character_lstm', 'character_embedding_dimension', 'character_lstm_hidden_state_dimension', 'token_embedding_dimension', 'token_lstm_hidden_state_dimension', 'use_crf' ] for name in pretrain_list: if param[name] != pretrain_param[name]: msg = """WARNING: parameter '{0}' was overwritten from '{1}' to '{2}' for consistency with the pretrained model""".format( name, param[name], pretrain_param[name]) # print(msg) param[name] = pretrain_param[name] else: msg = """Warning: pretraining parameter file not found.""" # print(msg) # update param_file_txt to reflect the overriding param_to_section = utils.get_parameter_to_section_of_configparser( param_file_txt) for k, v in param.items(): try: param_file_txt.set(param_to_section[k], k, str(v)) except: pass if param['verbose']: pprint(param) return param, param_file_txt