def fill_in_config_paths_for_fancy_output(config_template_path): """ Add paths to train, test, and output directories to a given config template file. """ train_dir = join(_my_dir, 'train') test_dir = join(_my_dir, 'test') output_dir = join(_my_dir, 'output') config = _setup_config_parser(config_template_path, validate=False) config.set("Input", "train_file", join(train_dir, "fancy_train.jsonlines")) config.set("Input", "test_file", join(test_dir, "fancy_test.jsonlines")) config.set("Output", "results", output_dir) config.set("Output", "log", output_dir) config.set("Output", "predictions", output_dir) config_prefix = re.search(r'^(.*)\.template\.cfg', config_template_path).groups()[0] new_config_path = '{}.cfg'.format(config_prefix) with open(new_config_path, 'w') as new_config_file: config.write(new_config_file) return new_config_path
def fill_in_config_paths_for_single_file(config_template_path, train_file, test_file, train_directory='', test_directory=''): """ Add paths to train and test files, and output directories to a given config template file. """ train_dir = join(_my_dir, 'train') test_dir = join(_my_dir, 'test') output_dir = join(_my_dir, 'output') config = _setup_config_parser(config_template_path, validate=False) task = config.get("General", "task") config.set("Input", "train_file", join(train_dir, train_file)) if task == 'predict' or task == 'evaluate': config.set("Input", "test_file", join(test_dir, test_file)) if train_directory: config.set("Input", "train_directory", join(train_dir, train_directory)) if test_directory: config.set("Input", "test_directory", join(test_dir, test_directory)) to_fill_in = ['log'] if task != 'train': to_fill_in.append('predictions') if task == 'cross_validate': if config.get("Output", "save_cv_models"): to_fill_in.append('models') else: to_fill_in.append('models') if task == 'evaluate' or task == 'cross_validate': to_fill_in.append('results') for d in to_fill_in: config.set("Output", d, join(output_dir)) if task == 'cross_validate': folds_file = config.get("Input", "folds_file") if folds_file: config.set("Input", "folds_file", join(train_dir, folds_file)) config_prefix = re.search(r'^(.*)\.template\.cfg', config_template_path).groups()[0] new_config_path = '{}.cfg'.format(config_prefix) with open(new_config_path, 'w') as new_config_file: config.write(new_config_file) return new_config_path
def fill_in_config_options(config_template_path, values_to_fill_dict, sub_prefix, good_probability_option=False): """ Fill in values in the given config template """ config = _setup_config_parser(config_template_path, validate=False) # The dictionary that says which option to fill in where # Note: (a) `bad_option` and `duplicate_option` are needed # for test_config_parsing_invalid_option and # test_config_parsing_duplicate_option() respectively. # (b) `probability` is deliberately specified in the wrong # section for test_config_parsing_option_in_wrong_section(). to_fill_in = { 'General': ['experiment_name', 'task'], 'Input': [ 'train_directory', 'train_file', 'test_directory', 'test_file', 'featuresets', 'featureset_names', 'feature_hasher', 'hasher_features', 'learners', 'sampler', 'shuffle', 'feature_scaling', 'learning_curve_cv_folds_list', 'folds_file', 'learning_curve_train_sizes', 'fixed_parameters', 'num_cv_folds', 'bad_option', 'duplicate_option', 'suffix' ], 'Tuning': [ 'grid_search', 'objective', 'use_folds_file_for_grid_search', 'grid_search_folds', 'pos_label_str', 'param_grids', 'objectives', 'duplicate_option' ], 'Output': [ 'results', 'log', 'models', 'metrics', 'predictions', 'pipeline', 'save_cv_folds', 'save_cv_models' ] } if good_probability_option: to_fill_in['Output'].append('probability') else: to_fill_in['Tuning'].append('probability') for section in to_fill_in: for param_name in to_fill_in[section]: if param_name in values_to_fill_dict: config.set(section, param_name, values_to_fill_dict[param_name]) config_prefix = re.search(r'^(.*)\.template\.cfg', config_template_path).groups()[0] new_config_path = '{}_{}.cfg'.format(config_prefix, sub_prefix) with open(new_config_path, 'w') as new_config_file: config.write(new_config_file) return new_config_path
def fill_in_config_paths(config_template_path): """ Add paths to train, test, and output directories to a given config template file. """ train_dir = join(_my_dir, 'train') test_dir = join(_my_dir, 'test') output_dir = join(_my_dir, 'output') config = _setup_config_parser(config_template_path, validate=False) task = config.get("General", "task") config.set("Input", "train_directory", train_dir) to_fill_in = ['log'] if task != 'learning_curve': to_fill_in.append('predictions') if task not in ['cross_validate', 'learning_curve']: to_fill_in.append('models') if task in ['cross_validate', 'evaluate', 'learning_curve']: to_fill_in.append('results') for d in to_fill_in: config.set("Output", d, join(output_dir)) if task == 'cross_validate': cv_folds_file = config.get("Input", "cv_folds_file") if cv_folds_file: config.set("Input", "cv_folds_file", join(train_dir, cv_folds_file)) if task == 'predict' or task == 'evaluate': config.set("Input", "test_directory", test_dir) # set up custom learner path, if relevant custom_learner_path = config.get("Input", "custom_learner_path") custom_learner_abs_path = join(_my_dir, custom_learner_path) config.set("Input", "custom_learner_path", custom_learner_abs_path) config_prefix = re.search(r'^(.*)\.template\.cfg', config_template_path).groups()[0] new_config_path = '{}.cfg'.format(config_prefix) with open(new_config_path, 'w') as new_config_file: config.write(new_config_file) return new_config_path
def fill_in_config_paths_for_single_file(config_template_path, train_file, test_file, train_directory='', test_directory=''): """ Add paths to train and test files, and output directories to a given config template file. """ train_dir = join(_my_dir, 'train') test_dir = join(_my_dir, 'test') output_dir = join(_my_dir, 'output') config = _setup_config_parser(config_template_path, validate=False) task = config.get("General", "task") config.set("Input", "train_file", join(train_dir, train_file)) if task == 'predict' or task == 'evaluate': config.set("Input", "test_file", join(test_dir, test_file)) if train_directory: config.set("Input", "train_directory", join(train_dir, train_directory)) if test_directory: config.set("Input", "test_directory", join(test_dir, test_directory)) to_fill_in = ['log', 'predictions'] if task != 'cross_validate': to_fill_in.append('models') if task == 'evaluate' or task == 'cross_validate': to_fill_in.append('results') for d in to_fill_in: config.set("Output", d, join(output_dir)) if task == 'cross_validate': cv_folds_file = config.get("Input", "cv_folds_file") if cv_folds_file: config.set("Input", "cv_folds_file", join(train_dir, cv_folds_file)) config_prefix = re.search(r'^(.*)\.template\.cfg', config_template_path).groups()[0] new_config_path = '{}.cfg'.format(config_prefix) with open(new_config_path, 'w') as new_config_file: config.write(new_config_file) return new_config_path
def fill_in_config_options(config_template_path, values_to_fill_dict, sub_prefix): """ Fill in values in the given config template """ config = _setup_config_parser(config_template_path, validate=False) # The dictionary that says which option to fill in where # Note: (a) `bad_option` and `duplicate_option` are needed # for test_config_parsing_invalid_option and # test_config_parsing_duplicate_option() respectively. # (b) `probability` is deliberately specified in the wrong # section for test_config_parsing_option_in_wrong_section(). to_fill_in = {'General': ['experiment_name', 'task'], 'Input': ['train_directory', 'train_file', 'test_directory', 'test_file', 'featuresets', 'featureset_names', 'feature_hasher', 'hasher_features', 'learners', 'sampler', 'shuffle', 'feature_scaling', 'fixed_parameters', 'num_cv_folds', 'bad_option', 'duplicate_option'], 'Tuning': ['probability', 'grid_search', 'objective', 'param_grids', 'objectives', 'duplicate_option'], 'Output': ['results', 'log', 'models', 'predictions']} for section in to_fill_in: for param_name in to_fill_in[section]: if param_name in values_to_fill_dict: config.set(section, param_name, values_to_fill_dict[param_name]) config_prefix = re.search(r'^(.*)\.template\.cfg', config_template_path).groups()[0] new_config_path = '{}_{}.cfg'.format(config_prefix, sub_prefix) with open(new_config_path, 'w') as new_config_file: config.write(new_config_file) return new_config_path
def test_int_labels(): """ Testing that SKLL can take integer input. This is just to test that SKLL can take int labels in the input (rather than floats or strings). For v1.0.0, it could not because the json package doesn't know how to serialize numpy.int64 objects. """ config_template_path = join(_my_dir, 'configs', 'test_int_labels_cv.template.cfg') config_path = join(_my_dir, 'configs', 'test_int_labels_cv.cfg') output_dir = join(_my_dir, 'output') config = _setup_config_parser(config_template_path, validate=False) config.set("Input", "train_file", join(_my_dir, 'other', 'test_int_labels_cv.jsonlines')) config.set("Output", "results", output_dir) config.set("Output", "log", output_dir) config.set("Output", "predictions", output_dir) with open(config_path, 'w') as new_config_file: config.write(new_config_file) run_configuration(config_path, quiet=True)