def test_run_experiment_lr_with_object_no_configdir(): """Test rsmtool using a Configuration object and no specified configdir.""" source = 'lr-object-no-path' experiment_id = 'lr_object_no_path' # set up a temporary directory since # we will be using getcwd old_file_dict = {'train': 'data/files/train.csv', 'test': 'data/files/test.csv', 'features': 'data/experiments/lr-object-no-path/features.csv'} temp_dir = tempfile.TemporaryDirectory(prefix=getcwd()) new_file_dict = copy_data_files(temp_dir.name, old_file_dict, rsmtool_test_dir) config_dict = {"train_file": new_file_dict['train'], "id_column": "ID", "use_scaled_predictions": True, "test_label_column": "score", "train_label_column": "score", "test_file": new_file_dict['test'], "trim_max": 6, "features": new_file_dict['features'], "trim_min": 1, "model": "LinearRegression", "experiment_id": "lr_object_no_path", "description": "Using all features with an LinearRegression model."} config_obj = Configuration(config_dict) check_run_experiment(source, experiment_id, config_obj_or_dict=config_obj)
def test_run_experiment_lr_with_object_and_configdir(): """Test rsmtool using a Configuration object and specified configdir.""" source = 'lr-object' experiment_id = 'lr_object' configdir = join(rsmtool_test_dir, 'data', 'experiments', source) config_dict = {"train_file": "../../files/train.csv", "id_column": "ID", "use_scaled_predictions": True, "test_label_column": "score", "train_label_column": "score", "test_file": "../../files/test.csv", "trim_max": 6, "features": "features.csv", "trim_min": 1, "model": "LinearRegression", "experiment_id": "lr_object", "description": "Using all features with an LinearRegression model."} config_obj = Configuration(config_dict, configdir=configdir) check_run_experiment(source, experiment_id, config_obj_or_dict=config_obj)
def test_run_experiment_lr_with_dictionary(): # Passing a dictionary as input. source = 'lr-dictionary' experiment_id = 'lr_dictionary' old_file_dict = {'train': 'data/files/train.csv', 'test': 'data/files/test.csv', 'features': 'data/experiments/lr-dictionary/features.csv'} temp_dir = tempfile.TemporaryDirectory(prefix=getcwd()) new_file_dict = copy_data_files(temp_dir.name, old_file_dict, rsmtool_test_dir) config_dict = {"train_file": new_file_dict['train'], "id_column": "ID", "use_scaled_predictions": True, "test_label_column": "score", "train_label_column": "score", "test_file": new_file_dict['test'], "trim_max": 6, "features": new_file_dict['features'], "trim_min": 1, "model": "LinearRegression", "experiment_id": "lr_dictionary", "description": "Using all features with an LinearRegression model."} check_run_experiment(source, experiment_id, config_obj_or_dict=config_dict)
def test_run_experiment_parameterized(*args, **kwargs): if TEST_DIR: kwargs['given_test_dir'] = TEST_DIR # suppress known convergence warnings for LinearSVR-based experiments # TODO: once SKLL hyperparameters can be passed, replace this code if args[0].startswith('linearsvr'): kwargs['suppress_warnings_for'] = [ConvergenceWarning] check_run_experiment(*args, **kwargs)
def test_run_experiment_lr_with_object_and_filepath(): """Test for a rare use case where an old Configuration object is passed.""" source = 'lr-object' experiment_id = 'lr_object' config_file = join(rsmtool_test_dir, 'data', 'experiments', source, '{}.json'.format(experiment_id)) config_dict = {"train_file": "../../files/train.csv", "id_column": "ID", "use_scaled_predictions": True, "test_label_column": "score", "train_label_column": "score", "test_file": "../../files/test.csv", "trim_max": 6, "features": "features.csv", "trim_min": 1, "model": "LinearRegression", "experiment_id": "lr_object", "description": "Using all features with an LinearRegression model."} config_obj = Configuration(config_dict) # we catch the deprecation warning triggered by this line with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=DeprecationWarning) config_obj.filepath = config_file # we have to explicitly remove configdir attribute # since it will always be assigned a value by the current code del(config_obj.configdir) check_run_experiment(source, experiment_id, config_obj_or_dict=config_obj)
def test_run_experiment_parameterized(*args, **kwargs): if TEST_DIR: kwargs['given_test_dir'] = TEST_DIR check_run_experiment(*args, **kwargs)