Пример #1
0
    def test_normalize_column(self):
        normalizer = data_normalizer()

        normalized_data = normalizer.normalize_data(self.data_list)

        self.assertAlmostEqual(0.6363636364, normalized_data[0]["col3"])
        self.assertAlmostEqual(0.0, normalized_data[1]["col3"])
        self.assertAlmostEqual(0.2727272727, normalized_data[2]["col3"])
        self.assertAlmostEqual(0.1818181818, normalized_data[3]["col3"])
        self.assertAlmostEqual(0.6363636364, normalized_data[4]["col3"])
        self.assertAlmostEqual(0.4545454545, normalized_data[5]["col3"])
        self.assertAlmostEqual(0.4545454545, normalized_data[6]["col3"])
        self.assertAlmostEqual(1.0, normalized_data[7]["col3"])
        self.assertAlmostEqual(0.6363636364, normalized_data[8]["col3"])
        self.assertAlmostEqual(0.6363636364, normalized_data[9]["col3"])
        self.assertAlmostEqual(0.25, normalized_data[0]["col6"])
        self.assertAlmostEqual(0.25, normalized_data[1]["col6"])
        self.assertAlmostEqual(0.625, normalized_data[2]["col6"])
        self.assertAlmostEqual(0.0, normalized_data[3]["col6"])
        self.assertAlmostEqual(0.75, normalized_data[4]["col6"])
        self.assertAlmostEqual(0.375, normalized_data[5]["col6"])
        self.assertAlmostEqual(1.0, normalized_data[6]["col6"])
        self.assertAlmostEqual(0.625, normalized_data[7]["col6"])
        self.assertAlmostEqual(0.375, normalized_data[8]["col6"])
        self.assertAlmostEqual(1.0, normalized_data[9]["col6"])
        self.assertAlmostEqual(1.0, normalized_data[0]["col7"])
        self.assertAlmostEqual(0.8888888889, normalized_data[1]["col7"])
        self.assertAlmostEqual(0.7777777778, normalized_data[2]["col7"])
        self.assertAlmostEqual(0.6666666667, normalized_data[3]["col7"])
        self.assertAlmostEqual(0.5555555556, normalized_data[4]["col7"])
        self.assertAlmostEqual(0.4444444444, normalized_data[5]["col7"])
        self.assertAlmostEqual(0.3333333333, normalized_data[6]["col7"])
        self.assertAlmostEqual(0.2222222222, normalized_data[7]["col7"])
        self.assertAlmostEqual(0.1111111111, normalized_data[8]["col7"])
        self.assertAlmostEqual(0.0, normalized_data[9]["col7"])
Пример #2
0
    def test_get_column_min_value(self):
        normalizer = data_normalizer()

        col_min = normalizer.get_column_min("col1", self.data_list)
        self.assertEqual(1, col_min)

        col_min = normalizer.get_column_min("col3", self.data_list)
        self.assertEqual(12, col_min)
Пример #3
0
    def test_get_column_max_value(self):
        normalizer = data_normalizer()

        col_max = normalizer.get_column_max("col7", self.data_list)
        self.assertEqual(10, col_max)

        col_max = normalizer.get_column_max("col2", self.data_list)
        self.assertEqual(80, col_max)
Пример #4
0
    def test_normalize_with_min_max_file(self):
        normalizer = data_normalizer()

        normalized_data = normalizer.normalize_data_with_min_max_file(self.data_list, self.csv_min_max_file)

        self.assertAlmostEqual(0.0909090909, normalized_data[0]["col1"])
        self.assertAlmostEqual(0.1818181818, normalized_data[1]["col1"])
        self.assertAlmostEqual(0.2727272727, normalized_data[2]["col1"])
        self.assertAlmostEqual(0.3636363636, normalized_data[3]["col1"])
        self.assertAlmostEqual(0.4545454545, normalized_data[4]["col1"])
        self.assertAlmostEqual(0.5454545455, normalized_data[5]["col1"])
        self.assertAlmostEqual(0.6363636364, normalized_data[6]["col1"])
        self.assertAlmostEqual(0.7272727273, normalized_data[7]["col1"])
        self.assertAlmostEqual(0.8181818182, normalized_data[8]["col1"])
        self.assertAlmostEqual(0.9090909091, normalized_data[9]["col1"])
        self.assertAlmostEqual(1.0, normalized_data[0]["col7"])
        self.assertAlmostEqual(0.5714285714, normalized_data[4]["col7"])
        self.assertAlmostEqual(0.0, normalized_data[9]["col7"])
Пример #5
0
normalized_data_file = "../resources/2016_examples_normalized.csv"

# TODO: need to recreate this file using existing data so as to not miss any new max or min values that may
# have come in 2015 or 2016, since it was created only using data up to 2014
min_max_file = "../resources/data/all_examples_with_dvoa_no_teams_normalized_min_max.csv"

data_list = []

with open(raw_data_file, "r") as data_file:
    reader = csv.DictReader(data_file)
    for line in reader:
        data_list.append(line)

# this will churn through all the data and create a normalized data set
# normalized_data = normalizer.normalize_data(data_list)
normalizer = data_normalizer()
normalized_data = normalizer.normalize_data_with_min_max_file(data_list, min_max_file)

# the normal example file uses season, week, home, away as column headers but those are only for readability.  They
# are not needed for any inputs or outputs, so ignoring them here when creating the normalized data which is explicitly
# used for input/output.
key_order = example_creator.season_examples[0].ordered_example_keys
key_order.remove("HOMEteam")
key_order.remove("AWAYteam")
key_order.remove("Season")
key_order.remove("Week")

writer = nfl_example_io()
writer.create_header(key_order, normalized_data_file)

for i in range(len(normalized_data)):