-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser.py
executable file
·125 lines (92 loc) · 2.64 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import csv
import pdb
import functions
yes_no = {
"Yes": 1,
"No": -1
}
def game_data_format():
translations = {
"Health": {
"Poor": [0,0,1],
"Fair": [0,1,0],
"Good": [-1,-1,-1]
},
"Armor": yes_no,
"Weapon": yes_no,
"Enemies": "continuous",
"Action": {
"Wander": [1,0,0,0],
"Hide": [0,1,0,0],
"Attack": [0,0,1,0],
"Run": [0,0,0,1]
}
}
columns = ["Health", "Armor", "Weapon", "Enemies", "Action"]
return {
"translations": translations,
"columns": columns
}
def iris_data_format():
translations = {
"Sepal Length": "continuous",
"Sepal Width": "continuous",
"Petal Length": "continuous",
"Petal Width": "continuous",
"Iris Type": {
"Setosa": [1,0,0],
"Versicolor": [0,1,0],
"Virginica": [0,0,1]
}
}
columns = ["Sepal Length", "Sepal Width", "Petal Length", "Petal Width", "Iris Type"]
return {
"translations": translations,
"columns": columns
}
def check_digit(value):
if value.isdigit():
return True
try:
x = float(value)
except ValueError:
return False
return True
def standardize(reader, class_count):
rows = []
for row in reader:
rows.append(row)
attributes = len(rows[0]) - class_count
for i in range(attributes):
if check_digit(rows[0][i]):
values = []
for row in rows:
values.append(row[i])
mean = functions.mean(values)
stddev = functions.standard_deviation(values, mean)
for row in rows:
row[i] = (float(row[i]) - mean) / stddev
return rows
def parse(filename, translations):
data = []
with open(filename, newline='') as csvfile:
reader = csv.reader(csvfile, delimiter=',')
rows = standardize(reader, 1)
for row in rows:
for i in range(len(row)):
translation = translations["translations"][translations["columns"][i]]
if translation is not "continuous":
row[i] = translation[row[i]]
else:
row[i] = float(row[i])
new_row = []
for i in row:
if isinstance(i, list):
for j in i:
new_row.append(j)
else:
new_row.append(i)
data.append(new_row)
return data
# data = parse('game-data.csv', game_data_format())
# pdb.set_trace()