Exemplo n.º 1
0
def test_iqr_filteringa(thermostat_emg_aux_constant_on_outlier):

    thermostats = list(thermostat_emg_aux_constant_on_outlier)
    # Run the metrics / statistics with the outlier thermostat in place
    iqflt_metrics = multiple_thermostat_calculate_epa_field_savings_metrics(thermostats)
    iqflt_output_dataframe = pd.DataFrame(iqflt_metrics, columns=COLUMNS)
    iqflt_summary_statistics = compute_summary_statistics(iqflt_output_dataframe)

    # Remove the outlier thermostat
    for i in range(0, len(thermostats) - 1):
        if thermostats[i].thermostat_id == 'thermostat_single_emg_aux_constant_on_outlier':
            outlier_thermostat = thermostats.pop(i)

    # Re-run the metrics / statistics with the outlier thermostat removed
    noiq_metrics = multiple_thermostat_calculate_epa_field_savings_metrics(thermostats)
    noiq_output_dataframe = pd.DataFrame(noiq_metrics, columns=COLUMNS)
    noiq_summary_statistics = compute_summary_statistics(noiq_output_dataframe)

    # Verify that the IQFLT removed the outliers by comparing this with the
    # metrics with the outlier thermostat already removed.
    for column in range(0, len(iqflt_summary_statistics) - 1):
        fields_iqflt = [x for x in iqflt_summary_statistics[column] if '_IQFLT' in x]
        for field_iqflt in fields_iqflt:
            field_noiq = field_iqflt.replace('IQFLT', 'NOIQ')
            left_side = iqflt_summary_statistics[column][field_iqflt]
            right_side = noiq_summary_statistics[column][field_noiq]

            if np.isnan(left_side) or np.isnan(right_side):
                assert(np.isnan(left_side) and np.isnan(right_side))
            else:
                assert(left_side == right_side)
Exemplo n.º 2
0
def test_compute_summary_statistics(combined_dataframe):
    summary_statistics = compute_summary_statistics(combined_dataframe)
    assert [len(s) for s in summary_statistics] == [
            49, 49, 49, 49,
            9105, 901, 9105, 901,
            ]

    def test_compute_summary_statistics_advanced(combined_dataframe):
        summary_statistics = compute_summary_statistics(combined_dataframe,
                advanced_filtering=True)
        assert [len(s) for s in summary_statistics] == [
                49, 49, 49, 49, 49, 49, 49, 49,
                9105, 901, 9105, 901, 9105, 901, 9105, 901,
                ]

        def test_summary_statistics_to_csv(combined_dataframe):
            summary_statistics = compute_summary_statistics(combined_dataframe)

    _, fname = tempfile.mkstemp()
    product_id = "FAKE"
    stats_df = summary_statistics_to_csv(summary_statistics, fname, product_id)
    assert isinstance(stats_df, pd.DataFrame)

    stats_df_reread = pd.read_csv(fname)
    assert stats_df_reread.shape == (9241, 9)
Exemplo n.º 3
0
def test_compute_summary_statistics_advanced(combined_dataframe):
    summary_statistics = compute_summary_statistics(combined_dataframe,
                                                    advanced_filtering=True)
    assert [len(s) for s in summary_statistics] == [
        21, 21, 21, 21, 21, 21, 21, 21,
        593, 397, 593, 397, 593, 397, 593, 397,
    ]
Exemplo n.º 4
0
def main():

    logging.basicConfig()
    # Example logging configuration for file and console output
    # logging.json: Normal logging example
    # logging_noisy.json: Turns on all debugging information
    # logging_quiet.json: Only logs error messages
    with open("logging.json", "r") as logging_config:
        logging.config.dictConfig(json.load(logging_config))

    logger = logging.getLogger(
        'epathermostat')  # Uses the 'epathermostat' logging
    logger.debug("Starting...")
    logging.captureWarnings(
        True
    )  # Set to True to log additional warning messages, False to only display on console

    data_dir = os.path.join("..", "tests", "data")
    metadata_filename = os.path.join(data_dir, "metadata.csv")

    # Use this to save the weather cache to local disk files
    # thermostats = from_csv(metadata_filename, verbose=True, save_cache=True, cache_path='/tmp/epa_weather_files/')

    # Verbose will override logging to display the imported thermostats. Set to "False" to use the logging level instead
    thermostats = from_csv(metadata_filename, verbose=True)

    output_dir = "."
    metrics = multiple_thermostat_calculate_epa_field_savings_metrics(
        thermostats)

    output_filename = os.path.join(output_dir, "thermostat_example_output.csv")
    metrics_out = metrics_to_csv(metrics, output_filename)

    stats = compute_summary_statistics(metrics_out)
    stats_advanced = compute_summary_statistics(metrics_out,
                                                advanced_filtering=True)

    product_id = "test_product"
    stats_filepath = os.path.join(data_dir, "thermostat_example_stats.csv")
    summary_statistics_to_csv(stats, stats_filepath, product_id)

    stats_advanced_filepath = os.path.join(
        data_dir, "thermostat_example_stats_advanced.csv")
    summary_statistics_to_csv(stats_advanced, stats_advanced_filepath,
                              product_id)
Exemplo n.º 5
0
    def test_compute_summary_statistics_advanced(combined_dataframe):
        summary_statistics = compute_summary_statistics(combined_dataframe,
                advanced_filtering=True)
        assert [len(s) for s in summary_statistics] == [
                49, 49, 49, 49, 49, 49, 49, 49,
                9105, 901, 9105, 901, 9105, 901, 9105, 901,
                ]

        def test_summary_statistics_to_csv(combined_dataframe):
            summary_statistics = compute_summary_statistics(combined_dataframe)
Exemplo n.º 6
0
def test_summary_statistics_to_csv(combined_dataframe):
    summary_statistics = compute_summary_statistics(combined_dataframe)

    _, fname = tempfile.mkstemp()
    product_id = "FAKE"
    stats_df = summary_statistics_to_csv(summary_statistics, fname, product_id)
    assert isinstance(stats_df, pd.DataFrame)

    stats_df_reread = pd.read_csv(fname)
    assert stats_df_reread.shape == (687, 9)
Exemplo n.º 7
0
def test_summary_statistics_to_csv(combined_dataframe):
    summary_statistics = compute_summary_statistics(combined_dataframe, "label")

    _, fname = tempfile.mkstemp()
    stats_df = summary_statistics_to_csv(summary_statistics, fname)
    assert isinstance(stats_df, pd.DataFrame)

    with open(fname, 'r') as f:
        columns = f.readline().split(",")
        assert len(columns) == 12 * 88 + 15
Exemplo n.º 8
0
def test_compute_summary_statistics(combined_dataframe):
    summary_statistics = compute_summary_statistics(combined_dataframe, "label")
    assert len(summary_statistics) == 2
    assert len(summary_statistics[0]) == 12 * 61 + 5
    assert len(summary_statistics[1]) == 12 * 47 + 5
    assert summary_statistics[0]["label"] == "label_heating"
    for key, value in summary_statistics[0].items():
        if key not in ["label"]:
            assert pd.notnull(value)
            assert not np.isinf(value)
def test_summary_statistics_to_csv(combined_dataframe):
    summary_statistics = compute_summary_statistics(combined_dataframe)

    _, fname = tempfile.mkstemp()
    product_id = "FAKE"
    stats_df = summary_statistics_to_csv(summary_statistics, fname, product_id)
    assert isinstance(stats_df, pd.DataFrame)

    stats_df_reread = pd.read_csv(fname)
    assert stats_df_reread.shape == (687, 9)
Exemplo n.º 10
0
def test_compute_summary_statistics(combined_dataframe):
    summary_statistics = compute_summary_statistics(combined_dataframe)
    assert [len(s) for s in summary_statistics] == [
        21,
        21,
        21,
        21,
        593,
        397,
        593,
        397,
    ]
Exemplo n.º 11
0
def test_compute_summary_statistics_advanced(combined_dataframe):
    summary_statistics = compute_summary_statistics(combined_dataframe,
                                                    advanced_filtering=True)
    assert [len(s) for s in summary_statistics] == [
        21,
        21,
        21,
        21,
        21,
        21,
        21,
        21,
        593,
        397,
        593,
        397,
        593,
        397,
        593,
        397,
    ]
Exemplo n.º 12
0
 def test_summary_statistics_to_csv(combined_dataframe):
     summary_statistics = compute_summary_statistics(combined_dataframe)
Exemplo n.º 13
0
def test_compute_summary_statistics(combined_dataframe):
    summary_statistics = compute_summary_statistics(combined_dataframe)
    assert [len(s) for s in summary_statistics] == [
        21, 21, 21, 21,
        593, 397, 593, 397,
    ]