/
run.py
137 lines (111 loc) · 3.36 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import argparse
from datetime import datetime
from genericpath import exists
import os
import sys
from kaggle_donors_choose_2014.model_runner import ModelRunner, models
import subprocess
git_hash = subprocess.check_output(
['git', 'rev-parse', 'HEAD'])[:10]
def save_stats(filename, stats):
with open(filename, 'wt') as f:
for line in stats:
f.write('{}\n'.format(line))
def save_predictions(filename, predictions):
with open(filename, 'wt') as f:
f.write('projectid,is_exciting\n')
predictions = list(predictions.itertuples())
for prediction in predictions:
f.write("%s,%s\n" % (
prediction[1],
prediction[2]
))
# http://stackoverflow.com/a/3637103
class DefaultHelpParser(argparse.ArgumentParser):
def error(self, message):
sys.stdout.write(os.linesep)
sys.stdout.write('ERROR: {0}{1}{1}'.format(message, os.linesep))
self.print_help()
sys.exit(2)
def parse_args():
p = DefaultHelpParser(
description='Model runner for KDD 2014 kaggle competition.')
p.add_argument(
'--model',
required=True,
dest='model_name',
action='store',
choices=models.keys(),
help='the name of the model to run'
)
p.add_argument(
'--submit',
dest='submit',
action='store_true',
help='toggle on to run for a submission'
)
args = p.parse_args()
return args
def last_few(pscore):
last_n = []
lastfile = 'output/last-100.txt'
if exists(lastfile):
with open(lastfile, 'r') as f:
for line in f:
dt, ghash, name, score = [i.strip() for i in line.split('\t')]
last_n.append([dt, ghash, name, float(score)])
last_n.append([
datetime.utcnow().strftime('%Y%m%d%H%M%S'),
git_hash,
model_name,
pscore
])
last_n = last_n[-100:]
with open(lastfile, 'w') as f:
for row in last_n:
f.write('{}\n'.format('\t'.join([str(i) for i in row])))
f.flush()
print('')
print('last 10 scores:')
for line in last_n[-10:]:
print(' {} {:>15} {:>25} {:>20}'.format(
line[0],
line[1],
line[2],
'{:.5f}'.format(line[3])
))
if __name__ == '__main__':
args = parse_args()
mr = ModelRunner()
model_name = args.model_name
if model_name not in models:
raise Exception('\n`{}` is not a valid model\n\nmodels:\n {}'.format(
model_name,
'\n '.join(models.keys())
))
mr.init(model_name)
# train
if not args.submit:
predictions = mr.train_model()
sname = 'output/{}-{}-{}.stats.txt'.format(
datetime.utcnow().strftime('%Y%m%d%H%M%S'),
model_name,
git_hash
)
stats = mr.stats(predictions)
print('')
for line in stats:
print(line)
save_stats(sname, stats)
pscore = mr.auc_roc_score(predictions)
last_few(pscore)
# real run for submission
else:
predictions = mr.test_model()
# save to file
oname = 'output/{}-{}-{}.csv'.format(
datetime.utcnow().strftime('%Y%m%d%H%M%S'),
model_name,
git_hash
)
save_predictions(oname, predictions)