def test_error_metrics(self, mock_comparison_dataframe): accuracy = Accuracy(Mock(), Mock(), Mock(), Mock(), Mock(), Mock(), Mock()) accuracy.comparison_dataframe = self._mock_comparison_dataframe() self.assertEqual(accuracy.root_mean_squared_error(), (1.0, 1.0)) self.assertListEqual(accuracy.root_squared_error().mean().tolist(), [1.0, 1.0]) self.assertListEqual(accuracy.absolute_pct_error().mean().tolist(), [2.0, 0.66666666666666663])
def test_error_report(self, mock_comparison_datframe, mock_from_data_dir): accuracy = Accuracy(Mock(), Mock(), Mock(), Mock(), Mock(), Mock(), Mock()) accuracy.comparison_dataframe = self._mock_comparison_dataframe() accuracy.from_data_dir.return_value = accuracy state_puma = dict() state_puma['20'] = ['00500', '00602', '00604'] state_puma['29'] = ['00901', '00902'] expected_columns = ['marginal-pums', 'marginal-doppelganger'] df_puma, df_variable, df_total =\ accuracy.error_report( state_puma, 'fake_dir', marginal_variables=['num_people', 'num_vehicles', 'age'], statistic=ErrorStat.ABSOLUTE_PCT_ERROR ) # Test df_total df_total_expected = pd.Series([2.00000, 0.666667], index=expected_columns) self.assertTrue(all((df_total - df_total_expected) < 1)) # Test df_puma expected_puma_data = np.reshape([2.0, 2 / 3.0] * 5, (5, 2)) df_expected_puma = pd.DataFrame(data=expected_puma_data, index=self._mock_state_puma(), columns=expected_columns) self.assertTrue((df_expected_puma == df_puma).all().all()) # Test df_variable expected_variable_data = np.reshape([2.0, 2 / 3.0] * 12, (12, 2)) df_expected_variable = pd.DataFrame(data=expected_variable_data, index=self._mock_variable_bins(), columns=expected_columns) self.assertTrue((df_expected_variable == df_variable).all().all()) # Test unimplemented statistic name try: self.assertRaises( Exception, Accuracy.error_report( state_puma, 'fake_dir', marginal_variables=['num_people', 'num_vehicles', 'age'], statistic='wrong-statistic-name')) except Exception: pass