def test_normalize_column(self): normalizer = data_normalizer() normalized_data = normalizer.normalize_data(self.data_list) self.assertAlmostEqual(0.6363636364, normalized_data[0]["col3"]) self.assertAlmostEqual(0.0, normalized_data[1]["col3"]) self.assertAlmostEqual(0.2727272727, normalized_data[2]["col3"]) self.assertAlmostEqual(0.1818181818, normalized_data[3]["col3"]) self.assertAlmostEqual(0.6363636364, normalized_data[4]["col3"]) self.assertAlmostEqual(0.4545454545, normalized_data[5]["col3"]) self.assertAlmostEqual(0.4545454545, normalized_data[6]["col3"]) self.assertAlmostEqual(1.0, normalized_data[7]["col3"]) self.assertAlmostEqual(0.6363636364, normalized_data[8]["col3"]) self.assertAlmostEqual(0.6363636364, normalized_data[9]["col3"]) self.assertAlmostEqual(0.25, normalized_data[0]["col6"]) self.assertAlmostEqual(0.25, normalized_data[1]["col6"]) self.assertAlmostEqual(0.625, normalized_data[2]["col6"]) self.assertAlmostEqual(0.0, normalized_data[3]["col6"]) self.assertAlmostEqual(0.75, normalized_data[4]["col6"]) self.assertAlmostEqual(0.375, normalized_data[5]["col6"]) self.assertAlmostEqual(1.0, normalized_data[6]["col6"]) self.assertAlmostEqual(0.625, normalized_data[7]["col6"]) self.assertAlmostEqual(0.375, normalized_data[8]["col6"]) self.assertAlmostEqual(1.0, normalized_data[9]["col6"]) self.assertAlmostEqual(1.0, normalized_data[0]["col7"]) self.assertAlmostEqual(0.8888888889, normalized_data[1]["col7"]) self.assertAlmostEqual(0.7777777778, normalized_data[2]["col7"]) self.assertAlmostEqual(0.6666666667, normalized_data[3]["col7"]) self.assertAlmostEqual(0.5555555556, normalized_data[4]["col7"]) self.assertAlmostEqual(0.4444444444, normalized_data[5]["col7"]) self.assertAlmostEqual(0.3333333333, normalized_data[6]["col7"]) self.assertAlmostEqual(0.2222222222, normalized_data[7]["col7"]) self.assertAlmostEqual(0.1111111111, normalized_data[8]["col7"]) self.assertAlmostEqual(0.0, normalized_data[9]["col7"])
def test_get_column_min_value(self): normalizer = data_normalizer() col_min = normalizer.get_column_min("col1", self.data_list) self.assertEqual(1, col_min) col_min = normalizer.get_column_min("col3", self.data_list) self.assertEqual(12, col_min)
def test_get_column_max_value(self): normalizer = data_normalizer() col_max = normalizer.get_column_max("col7", self.data_list) self.assertEqual(10, col_max) col_max = normalizer.get_column_max("col2", self.data_list) self.assertEqual(80, col_max)
def test_normalize_with_min_max_file(self): normalizer = data_normalizer() normalized_data = normalizer.normalize_data_with_min_max_file(self.data_list, self.csv_min_max_file) self.assertAlmostEqual(0.0909090909, normalized_data[0]["col1"]) self.assertAlmostEqual(0.1818181818, normalized_data[1]["col1"]) self.assertAlmostEqual(0.2727272727, normalized_data[2]["col1"]) self.assertAlmostEqual(0.3636363636, normalized_data[3]["col1"]) self.assertAlmostEqual(0.4545454545, normalized_data[4]["col1"]) self.assertAlmostEqual(0.5454545455, normalized_data[5]["col1"]) self.assertAlmostEqual(0.6363636364, normalized_data[6]["col1"]) self.assertAlmostEqual(0.7272727273, normalized_data[7]["col1"]) self.assertAlmostEqual(0.8181818182, normalized_data[8]["col1"]) self.assertAlmostEqual(0.9090909091, normalized_data[9]["col1"]) self.assertAlmostEqual(1.0, normalized_data[0]["col7"]) self.assertAlmostEqual(0.5714285714, normalized_data[4]["col7"]) self.assertAlmostEqual(0.0, normalized_data[9]["col7"])
normalized_data_file = "../resources/2016_examples_normalized.csv" # TODO: need to recreate this file using existing data so as to not miss any new max or min values that may # have come in 2015 or 2016, since it was created only using data up to 2014 min_max_file = "../resources/data/all_examples_with_dvoa_no_teams_normalized_min_max.csv" data_list = [] with open(raw_data_file, "r") as data_file: reader = csv.DictReader(data_file) for line in reader: data_list.append(line) # this will churn through all the data and create a normalized data set # normalized_data = normalizer.normalize_data(data_list) normalizer = data_normalizer() normalized_data = normalizer.normalize_data_with_min_max_file(data_list, min_max_file) # the normal example file uses season, week, home, away as column headers but those are only for readability. They # are not needed for any inputs or outputs, so ignoring them here when creating the normalized data which is explicitly # used for input/output. key_order = example_creator.season_examples[0].ordered_example_keys key_order.remove("HOMEteam") key_order.remove("AWAYteam") key_order.remove("Season") key_order.remove("Week") writer = nfl_example_io() writer.create_header(key_order, normalized_data_file) for i in range(len(normalized_data)):