def test_run_experiment_with_warnings():

    source = 'lr-with-warnings'
    experiment_id = 'lr_with_warnings'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))

    do_run_experiment(source, experiment_id, config_file)

    html_file = join('test_outputs', source, 'report',
                     experiment_id + '_report.html')
    report_warnings = collect_warning_messages_from_report(html_file)

    syntax_warnings = [
        msg for msg in report_warnings if 'SyntaxWarning:' in msg
    ]
    deprecation_warnings = [
        msg for msg in report_warnings if 'DeprecationWarning:' in msg
    ]
    unicode_warnings = [
        msg for msg in report_warnings if 'UnicodeWarning:' in msg
    ]
    runtime_warnings = [
        msg for msg in report_warnings if 'RuntimeWarning:' in msg
    ]
    user_warnings = [msg for msg in report_warnings if 'UserWarning:' in msg]

    assert_equal(len(syntax_warnings), 1)
    assert_equal(len(deprecation_warnings), 2)
    assert_equal(len(unicode_warnings), 1)
    assert_equal(len(runtime_warnings), 1)
    assert_equal(len(user_warnings), 1)
def test_run_experiment_lr_with_cfg():
    # basic experiment with a LinearRegression model

    source = 'lr-cfg'
    experiment_id = 'lr_cfg'
    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.cfg'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
    output_dir = join('test_outputs', source, 'output')
    expected_output_dir = join(rsmtool_test_dir, 'data', 'experiments', source, 'output')
    html_report = join('test_outputs', source, 'report', '{}_report.html'.format(experiment_id))

    csv_files = glob(join(output_dir, '*.csv'))
    for csv_file in csv_files:
        csv_filename = basename(csv_file)
        expected_csv_file = join(expected_output_dir, csv_filename)

        if exists(expected_csv_file):
            yield check_file_output, csv_file, expected_csv_file

    yield check_generated_output, csv_files, experiment_id, 'rsmtool'
    yield check_scaled_coefficients, source, experiment_id
    yield check_report, html_report
Пример #3
0
def test_run_experiment_lr_with_cfg():
    # basic experiment with a LinearRegression model

    source = 'lr-cfg'
    experiment_id = 'lr_cfg'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.cfg'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
    output_dir = join('test_outputs', source, 'output')
    expected_output_dir = join(rsmtool_test_dir, 'data', 'experiments', source,
                               'output')
    html_report = join('test_outputs', source, 'report',
                       '{}_report.html'.format(experiment_id))

    csv_files = glob(join(output_dir, '*.csv'))
    for csv_file in csv_files:
        csv_filename = basename(csv_file)
        expected_csv_file = join(expected_output_dir, csv_filename)

        if exists(expected_csv_file):
            yield check_file_output, csv_file, expected_csv_file

    yield check_generated_output, csv_files, experiment_id, 'rsmtool'
    yield check_scaled_coefficients, source, experiment_id
    yield check_report, html_report
def test_run_experiment_empwtdropneg():

    # rsmtool experiment with no longer supported empWtDropNeg model
    source = 'empwtdropneg'
    experiment_id = 'empWtDropNeg'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_with_truncations_no_features_field():

    # rsmtool experiment with truncations, but no feature field
    source = 'lr-with-truncations-no-features-field'
    experiment_id = 'lr_with_truncations_no_features_field'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_with_truncations_no_features_columns():

    # rsmtool experiment with truncations, but no min/max columns in feature file
    source = 'lr-with-truncations-no-features-columns'
    experiment_id = 'lr_with_truncations_no_features_columns'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_with_sc1_as_feature_name():

    # rsmtool experiment with sc1 used as the name of a feature
    source = 'lr-with-sc1-as-feature-name'
    experiment_id = 'lr_with_sc1_as_feature_name'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
Пример #8
0
def test_run_experiment_duplicate_feature_names():

    # rsmtool experiment with duplicate feature names
    source = 'lr-with-duplicate-feature-names'
    experiment_id = 'lr_with_duplicate_feature_names'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_wrong_model_name():

    # rsmtool experiment with incorrect model name
    source = 'wrong-model-name'
    experiment_id = 'wrong_model_name'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_requested_feature_zero_sd():

    # rsmtool experiment when a requested feature has zero sd
    source = 'lr-with-requested-feature-with-zero-sd'
    experiment_id = 'lr_with_requested_feature_with_zero_sd'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
Пример #11
0
def test_run_experiment_wrong_feature_file_path():
    # basic experiment with the path in features field pointing to
    # a non-existing file
    source = 'lr-wrong-path-features'
    experiment_id = 'lr'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_with_repeated_ids():

    # rsmtool experiment with non-unique ids
    source = 'lr-with-repeated-ids'
    experiment_id = 'lr_with_repeated_ids'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_h2_column_and_feature():

    # rsmtool experiment that has second rater column but
    # the same column as a model feature
    source = 'lr-with-h2-and-feature'
    experiment_id = 'lr_with_h2_and_feature'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
Пример #14
0
def test_run_experiment_lr_length_column_and_feature_list():
    # experiment with feature as list instead of file name
    # and length included in feature list and as length column

    source = 'lr-with-length-and-feature-list'
    experiment_id = 'lr_with_length_and_feature'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_subset_feature_file_and_feature_file():
    # basic experiment with LinearRegression model and a feature file and
    # also a subset file. This is not allowed and so should raise a ValueError.

    source = 'lr-with-feature-subset-file-and-feature-file'
    experiment_id = 'lr_with_feature_subset_file'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_with_length_as_feature_name():

    # rsmtool experiment with 'length' used as feature name
    # when a length analysis is requested using a different feature
    source = 'lr-with-length-as-feature-name'
    experiment_id = 'lr_with_length_as_feature_name'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_with_subgroup_as_feature_name():

    # rsmtool experiment with a subgroup name used as feature
    # when the user also requests subgroup analysis with that subgroup

    source = 'lr-with-subgroup-as-feature-name'
    experiment_id = 'lr_with_subgroup_as_feature_name'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_same_h1_and_h2():

    # rsmtool experiment that has label column
    # and second rater column set the same

    source = 'lr-same-h1-and-h2'
    experiment_id = 'lr_same_h1_and_h2'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_with_sc2_as_feature_name():

    # rsmtool experiment with sc2 used as a feature name
    # when the user also requests h2 analysis using a different
    # column
    source = 'lr-with-sc2-as-feature-name'
    experiment_id = 'lr_with_sc2_as_feature_name'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_all_non_numeric_scores():

    # rsmtool experiment with all values for `sc1`
    # being non-numeric and all getting filtered out
    # which should raise an exception

    source = 'lr-with-all-non-numeric-scores'
    experiment_id = 'lr_with_all_non_numeric_scores'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_with_truncations_no_features_columns():

    # rsmtool experiment with truncations, but no min/max columns in feature file
    source = 'lr-with-truncations-no-features-columns'
    experiment_id = 'lr_with_truncations_no_features_columns'
    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_requested_feature_zero_sd():

    # rsmtool experiment when a requested feature has zero sd
    source = 'lr-with-requested-feature-with-zero-sd'
    experiment_id = 'lr_with_requested_feature_with_zero_sd'
    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_wrong_model_name():

    # rsmtool experiment with incorrect model name
    source = 'wrong-model-name'
    experiment_id = 'wrong_model_name'
    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_duplicate_feature_names():

    # rsmtool experiment with duplicate feature names
    source = 'lr-with-duplicate-feature-names'
    experiment_id = 'lr_with_duplicate_feature_names'
    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_with_repeated_ids():

    # rsmtool experiment with non-unique ids
    source = 'lr-with-repeated-ids'
    experiment_id = 'lr_with_repeated_ids'
    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_wrong_feature_file_path():
    # basic experiment with the path in features field pointing to
    # a non-existing file
    source = 'lr-wrong-path-features'
    experiment_id = 'lr'
    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_one_fully_non_numeric_feature():

    # rsmtool experiment with all values for one of the
    # features being non-numeric and all getting filtered out
    # which should raise an exception

    source = 'lr-with-one-fully-non-numeric-feature'
    experiment_id = 'lr_with_one_fully_non_numeric_feature'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_none_flagged():

    # rsmtool experiment where all responses have the bad flag
    # value and so they all get filtered out which should
    # raise an exception

    source = 'lr-with-none-flagged'
    experiment_id = 'lr_with_none_flagged'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_empwtdropneg():

    # rsmtool experiment with no longer supported empWtDropNeg model
    source = 'empwtdropneg'
    experiment_id = 'empWtDropNeg'
    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_with_truncations_no_features_field():

    # rsmtool experiment with truncations, but no feature field
    source = 'lr-with-truncations-no-features-field'
    experiment_id = 'lr_with_truncations_no_features_field'
    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_with_sc1_as_feature_name():

    # rsmtool experiment with sc1 used as the name of a feature
    source = 'lr-with-sc1-as-feature-name'
    experiment_id = 'lr_with_sc1_as_feature_name'
    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_length_column_and_feature_list():
    # experiment with feature as list instead of file name
    # and length included in feature list and as length column

    source = 'lr-with-length-and-feature-list'
    experiment_id = 'lr_with_length_and_feature'
    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_subset_feature_file_and_feature_file():
    # basic experiment with LinearRegression model and a feature file and
    # also a subset file. This is not allowed and so should raise a ValueError.

    source = 'lr-with-feature-subset-file-and-feature-file'
    experiment_id = 'lr_with_feature_subset_file'
    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_with_length_as_feature_name():

    # rsmtool experiment with 'length' used as feature name
    # when a length analysis is requested using a different feature
    source = 'lr-with-length-as-feature-name'
    experiment_id = 'lr_with_length_as_feature_name'
    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_h2_column_and_feature():

    # rsmtool experiment that has second rater column but
    # the same column as a model feature
    source = 'lr-with-h2-and-feature'
    experiment_id = 'lr_with_h2_and_feature'
    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_with_subgroup_as_feature_name():

    # rsmtool experiment with a subgroup name used as feature
    # when the user also requests subgroup analysis with that subgroup

    source = 'lr-with-subgroup-as-feature-name'
    experiment_id = 'lr_with_subgroup_as_feature_name'
    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_same_h1_and_h2():

    # rsmtool experiment that has label column
    # and second rater column set the same

    source = 'lr-same-h1-and-h2'
    experiment_id = 'lr_same_h1_and_h2'
    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
Пример #38
0
def test_run_experiment_lr_feature_json():
    # basic experiment with a LinearRegression model but using
    # feature json file

    source = 'lr-feature-json'
    experiment_id = 'lr'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))

    # run this experiment but suppress the expected deprecation warnings
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', category=DeprecationWarning)
        do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_with_sc2_as_feature_name():

    # rsmtool experiment with sc2 used as a feature name
    # when the user also requests h2 analysis using a different
    # column
    source = 'lr-with-sc2-as-feature-name'
    experiment_id = 'lr_with_sc2_as_feature_name'
    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
Пример #40
0
def test_run_experiment_lr_old_config():
    # basic experiment with a LinearRegression model but using an
    # old style configuration file

    source = 'lr-old-config'
    experiment_id = 'empWt'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))

    # run this experiment but suppress the expected deprecation warnings
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', category=DeprecationWarning)
        do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_all_non_numeric_scores():

    # rsmtool experiment with all values for `sc1`
    # being non-numeric and all getting filtered out
    # which should raise an exception

    source = 'lr-with-all-non-numeric-scores'
    experiment_id = 'lr_with_all_non_numeric_scores'
    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_none_flagged():

    # rsmtool experiment where all responses have the bad flag
    # value and so they all get filtered out which should
    # raise an exception

    source = 'lr-with-none-flagged'
    experiment_id = 'lr_with_none_flagged'
    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_one_fully_non_numeric_feature():

    # rsmtool experiment with all values for one of the
    # features being non-numeric and all getting filtered out
    # which should raise an exception

    source = 'lr-with-one-fully-non-numeric-feature'
    experiment_id = 'lr_with_one_fully_non_numeric_feature'
    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_feature_json():
    # basic experiment with a LinearRegression model but using
    # feature json file

    source = 'lr-feature-json'
    experiment_id = 'lr'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))

    # run this experiment but suppress the expected deprecation warnings
    do_run_experiment(source,
                      experiment_id,
                      config_file,
                      suppress_warnings_for=[DeprecationWarning])
def test_run_experiment_lr_old_config():
    # basic experiment with a LinearRegression model but using an
    # old style configuration file

    source = 'lr-old-config'
    experiment_id = 'empWt'
    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))

    # run this experiment but suppress the expected deprecation warnings
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', category=DeprecationWarning)
        do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_feature_json():
    # basic experiment with a LinearRegression model but using
    # feature json file

    source = 'lr-feature-json'
    experiment_id = 'lr'
    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))

    # run this experiment but suppress the expected deprecation warnings
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', category=DeprecationWarning)
        do_run_experiment(source, experiment_id, config_file)
Пример #47
0
def test_run_experiment_lr_with_object():
    # basic experiment with a LinearRegression model

    source = 'lr-object'
    experiment_id = 'lr_object'

    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))

    config_dict = {
        "train_file": "../../files/train.csv",
        "id_column": "ID",
        "use_scaled_predictions": True,
        "test_label_column": "score",
        "train_label_column": "score",
        "test_file": "../../files/test.csv",
        "trim_max": 6,
        "features": "features.csv",
        "trim_min": 1,
        "model": "LinearRegression",
        "experiment_id": "lr_object",
        "description": "Using all features with an LinearRegression model."
    }

    config_parser = ConfigurationParser()
    config_parser.load_config_from_dict(config_dict)
    config_obj = config_parser.normalize_validate_and_process_config()
    config_obj = config_file

    do_run_experiment(source, experiment_id, config_obj)
    output_dir = join('test_outputs', source, 'output')
    expected_output_dir = join(rsmtool_test_dir, 'data', 'experiments', source,
                               'output')
    html_report = join('test_outputs', source, 'report',
                       '{}_report.html'.format(experiment_id))

    csv_files = glob(join(output_dir, '*.csv'))
    for csv_file in csv_files:
        csv_filename = basename(csv_file)
        expected_csv_file = join(expected_output_dir, csv_filename)

        if exists(expected_csv_file):
            yield check_file_output, csv_file, expected_csv_file

    yield check_generated_output, csv_files, experiment_id, 'rsmtool'
    yield check_scaled_coefficients, source, experiment_id
    yield check_report, html_report
def test_run_experiment_lr_with_object():
    # basic experiment with a LinearRegression model

    source = 'lr-object'
    experiment_id = 'lr_object'

    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))

    config_dict = {"train_file": "../../files/train.csv",
                   "id_column": "ID",
                   "use_scaled_predictions": True,
                   "test_label_column": "score",
                   "train_label_column": "score",
                   "test_file": "../../files/test.csv",
                   "trim_max": 6,
                   "features": "features.csv",
                   "trim_min": 1,
                   "model": "LinearRegression",
                   "experiment_id": "lr_object",
                   "description": "Using all features with an LinearRegression model."}

    config_parser = ConfigurationParser()
    config_parser.load_config_from_dict(config_dict)
    config_obj = config_parser.normalize_validate_and_process_config()
    config_obj = config_file

    do_run_experiment(source, experiment_id, config_obj)
    output_dir = join('test_outputs', source, 'output')
    expected_output_dir = join(rsmtool_test_dir, 'data', 'experiments', source, 'output')
    html_report = join('test_outputs', source, 'report', '{}_report.html'.format(experiment_id))

    csv_files = glob(join(output_dir, '*.csv'))
    for csv_file in csv_files:
        csv_filename = basename(csv_file)
        expected_csv_file = join(expected_output_dir, csv_filename)

        if exists(expected_csv_file):
            yield check_file_output, csv_file, expected_csv_file

    yield check_generated_output, csv_files, experiment_id, 'rsmtool'
    yield check_scaled_coefficients, source, experiment_id
    yield check_report, html_report
def test_run_experiment_lr_rsmtool_and_rsmpredict():

    # this test is to make sure that both rsmtool
    # and rsmpredict generate the same files

    source = 'lr-rsmtool-rsmpredict'
    experiment_id = 'lr_rsmtool_rsmpredict'
    rsmtool_config_file = join(rsmtool_test_dir,
                               'data',
                               'experiments',
                               source,
                               '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, rsmtool_config_file)
    rsmpredict_config_file = join(rsmtool_test_dir,
                                  'data',
                                  'experiments',
                                  source,
                                  'rsmpredict.json')
    do_run_prediction(source, rsmpredict_config_file)
    output_dir = join('test_outputs', source, 'output')
    expected_output_dir = join(rsmtool_test_dir, 'data', 'experiments', source, 'output')
    csv_files = glob(join(output_dir, '*.csv'))
    html_report = join('test_outputs', source, 'report', '{}_report.html'.format(experiment_id))

    # Check the results for  rsmtool
    for csv_file in csv_files:
        csv_filename = basename(csv_file)
        expected_csv_file = join(expected_output_dir, csv_filename)

        if exists(expected_csv_file):
            yield check_file_output, csv_file, expected_csv_file

    yield check_scaled_coefficients, source, experiment_id
    yield check_generated_output, csv_files, experiment_id, 'rsmtool'
    yield check_report, html_report

    # check that the rsmpredict generated the same results
    for csv_pair in [('predictions.csv',
                      '{}_pred_processed.csv'.format(experiment_id)),
                     ('preprocessed_features.csv',
                      '{}_test_preprocessed_features.csv'.format(experiment_id))]:
        output_file = join(output_dir, csv_pair[0])
        expected_output_file = join(expected_output_dir, csv_pair[1])

        yield check_file_output, output_file, expected_output_file
Пример #50
0
    def test_load_rsmtool_output(self):
        source = 'lr-subgroups-with-h2'
        experiment_id = 'lr_subgroups_with_h2'
        test_dir = dirname(__file__)
        config_file = join(test_dir, 'data', 'experiments', source,
                           '{}.json'.format(experiment_id))
        do_run_experiment(source, experiment_id, config_file)
        output_dir = join('test_outputs', source, 'output')
        figure_dir = join('test_outputs', source, 'figure')

        comparer = Comparer()
        csvs, figs, file_format = comparer.load_rsmtool_output(
            output_dir, figure_dir, experiment_id, 'scale', ['QUESTION', 'L1'])

        expected_csv_keys = [
            'df_coef', 'df_confmatrix', 'df_consistency', 'df_degradation',
            'df_descriptives', 'df_disattenuated_correlations',
            'df_disattenuated_correlations_by_L1',
            'df_disattenuated_correlations_by_L1_overview',
            'df_disattenuated_correlations_by_QUESTION',
            'df_disattenuated_correlations_by_QUESTION_overview', 'df_eval',
            'df_eval_by_L1', 'df_eval_by_L1_m_sd', 'df_eval_by_L1_overview',
            'df_eval_by_QUESTION', 'df_eval_by_QUESTION_m_sd',
            'df_eval_by_QUESTION_overview', 'df_eval_for_degradation',
            'df_feature_cors', 'df_mcor_sc1', 'df_mcor_sc1_L1_overview',
            'df_mcor_sc1_QUESTION_overview', 'df_mcor_sc1_by_L1',
            'df_mcor_sc1_by_QUESTION', 'df_mcor_sc1_overview', 'df_model_fit',
            'df_outliers', 'df_pca', 'df_pcavar', 'df_pcor_sc1',
            'df_pcor_sc1_L1_overview', 'df_pcor_sc1_QUESTION_overview',
            'df_pcor_sc1_by_L1', 'df_pcor_sc1_by_QUESTION',
            'df_pcor_sc1_overview', 'df_percentiles', 'df_score_dist',
            'df_scores', 'df_train_features'
        ]

        expected_fig_keys = [
            'betas', 'eval_barplot_by_L1', 'eval_barplot_by_QUESTION',
            'feature_boxplots_by_L1_svg', 'feature_boxplots_by_QUESTION_svg',
            'feature_distplots', 'pca_scree_plot'
        ]

        assert_equal(file_format, 'csv')
        assert_equal(expected_csv_keys, sorted(csvs.keys()))
        assert_equal(expected_fig_keys, sorted(figs.keys()))
Пример #51
0
def test_run_experiment_lr_rsmtool_and_rsmpredict():
    '''
    this test is to make sure that both rsmtool
    and rsmpredict generate the same files
    '''

    source = 'lr-rsmtool-rsmpredict'
    experiment_id = 'lr_rsmtool_rsmpredict'
    rsmtool_config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                               '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, rsmtool_config_file)
    rsmpredict_config_file = join(rsmtool_test_dir, 'data', 'experiments',
                                  source, 'rsmpredict.json')
    do_run_prediction(source, rsmpredict_config_file)
    output_dir = join('test_outputs', source, 'output')
    expected_output_dir = join(rsmtool_test_dir, 'data', 'experiments', source,
                               'output')
    csv_files = glob(join(output_dir, '*.csv'))
    html_report = join('test_outputs', source, 'report',
                       '{}_report.html'.format(experiment_id))

    # Check the results for  rsmtool
    for csv_file in csv_files:
        csv_filename = basename(csv_file)
        expected_csv_file = join(expected_output_dir, csv_filename)

        if exists(expected_csv_file):
            yield check_file_output, csv_file, expected_csv_file

    yield check_scaled_coefficients, source, experiment_id
    yield check_generated_output, csv_files, experiment_id, 'rsmtool'
    yield check_report, html_report

    # check that the rsmpredict generated the same results
    for csv_pair in [
        ('predictions.csv', '{}_pred_processed.csv'.format(experiment_id)),
        ('preprocessed_features.csv',
         '{}_test_preprocessed_features.csv'.format(experiment_id))
    ]:
        output_file = join(output_dir, csv_pair[0])
        expected_output_file = join(expected_output_dir, csv_pair[1])

        yield check_file_output, output_file, expected_output_file
def test_run_experiment_lr_with_notebook_rerun():
    # basic experiment with LinearRegression model and notebook
    # run-run after the experiment after `RSM_REPORT_DIR` is deleted
    # to ensure that the `.environ.json` file can be located

    source = 'lr-with-notebook-rerun'
    experiment_id = 'lr'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)

    report_ipynb = join('test_outputs', source, 'report',
                        '{}_report.ipynb'.format(experiment_id))
    report_html = join('test_outputs', source, 'report',
                       '{}_report.html'.format(experiment_id))

    del os.environ['RSM_REPORT_DIR']

    Reporter.convert_ipynb_to_html(report_ipynb, report_html)
    check_report(report_html)
def test_run_experiment_lr_with_notebook_rerun():
    # basic experiment with LinearRegression model and notebook
    # run-run after the experiment after `RSM_REPORT_DIR` is deleted
    # to ensure that the `.environ.json` file can be located

    source = 'lr-with-notebook-rerun'
    experiment_id = 'lr'
    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)

    report_ipynb = join('test_outputs', source, 'report', '{}_report.ipynb'.format(experiment_id))
    report_html = join('test_outputs', source, 'report', '{}_report.html'.format(experiment_id))

    del os.environ['RSM_REPORT_DIR']

    Reporter.convert_ipynb_to_html(report_ipynb, report_html)
    check_report(report_html)
def test_run_experiment_lr_with_notebook_rerun_fail():
    # basic experiment with LinearRegression model and notebook
    # run-run after the experiment after `RSM_REPORT_DIR` is deleted
    # and `.environ.json` is deleted, so the notebook execution will fail

    source = 'lr-with-notebook-rerun-fail'
    experiment_id = 'lr'
    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)

    report_env = join('test_outputs', source, 'report', '.environ.json'.format(experiment_id))
    report_ipynb = join('test_outputs', source, 'report', '{}_report.ipynb'.format(experiment_id))
    report_html = join('test_outputs', source, 'report', '{}_report.html'.format(experiment_id))

    del os.environ['RSM_REPORT_DIR']
    os.remove(report_env)

    Reporter.convert_ipynb_to_html(report_ipynb, report_html)
def test_run_experiment_lr_with_notebook_rerun_fail():
    # basic experiment with LinearRegression model and notebook
    # run-run after the experiment after `RSM_REPORT_DIR` is deleted
    # and `.environ.json` is deleted, so the notebook execution will fail

    source = 'lr-with-notebook-rerun-fail'
    experiment_id = 'lr'
    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))
    do_run_experiment(source, experiment_id, config_file)

    report_env = join('test_outputs', source, 'report',
                      '.environ.json'.format(experiment_id))
    report_ipynb = join('test_outputs', source, 'report',
                        '{}_report.ipynb'.format(experiment_id))
    report_html = join('test_outputs', source, 'report',
                       '{}_report.html'.format(experiment_id))

    del os.environ['RSM_REPORT_DIR']
    os.remove(report_env)

    Reporter.convert_ipynb_to_html(report_ipynb, report_html)
    def test_load_rsmtool_output(self):
        source = 'lr-subgroups-with-h2'
        experiment_id = 'lr_subgroups_with_h2'
        test_dir = dirname(__file__)
        config_file = join(test_dir,
                           'data',
                           'experiments',
                           source,
                           '{}.json'.format(experiment_id))
        do_run_experiment(source, experiment_id, config_file)
        output_dir = join('test_outputs', source, 'output')
        figure_dir = join('test_outputs', source, 'figure')

        comparer = Comparer()
        csvs, figs, file_format = comparer.load_rsmtool_output(output_dir,
                                                               figure_dir,
                                                               experiment_id,
                                                               'scale',
                                                               ['QUESTION', 'L1'])

        expected_csv_keys = ['df_coef',
                             'df_confmatrix',
                             'df_consistency',
                             'df_degradation',
                             'df_descriptives',
                             'df_disattenuated_correlations',
                             'df_disattenuated_correlations_by_L1',
                             'df_disattenuated_correlations_by_L1_overview',
                             'df_disattenuated_correlations_by_QUESTION',
                             'df_disattenuated_correlations_by_QUESTION_overview',
                             'df_eval',
                             'df_eval_by_L1',
                             'df_eval_by_L1_m_sd',
                             'df_eval_by_L1_overview',
                             'df_eval_by_QUESTION',
                             'df_eval_by_QUESTION_m_sd',
                             'df_eval_by_QUESTION_overview',
                             'df_eval_for_degradation',
                             'df_feature_cors',
                             'df_mcor_sc1',
                             'df_mcor_sc1_L1_overview',
                             'df_mcor_sc1_QUESTION_overview',
                             'df_mcor_sc1_by_L1',
                             'df_mcor_sc1_by_QUESTION',
                             'df_mcor_sc1_overview',
                             'df_model_fit',
                             'df_outliers',
                             'df_pca',
                             'df_pcavar',
                             'df_pcor_sc1',
                             'df_pcor_sc1_L1_overview',
                             'df_pcor_sc1_QUESTION_overview',
                             'df_pcor_sc1_by_L1',
                             'df_pcor_sc1_by_QUESTION',
                             'df_pcor_sc1_overview',
                             'df_percentiles',
                             'df_score_dist',
                             'df_scores',
                             'df_train_features']

        expected_fig_keys = ['betas',
                             'eval_barplot_by_L1',
                             'eval_barplot_by_QUESTION',
                             'feature_boxplots_by_L1_svg',
                             'feature_boxplots_by_QUESTION_svg',
                             'feature_distplots',
                             'pca_scree_plot']

        assert_equal(file_format, 'csv')
        assert_equal(expected_csv_keys, sorted(csvs.keys()))
        assert_equal(expected_fig_keys, sorted(figs.keys()))