/
election_regression.py
executable file
·106 lines (89 loc) · 3.09 KB
/
election_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys,json
import json
from jubatus.regression.client import Regression
from jubatus.regression.types import *
from jubatus.classifier.types import LabeledDatum
from jubatus.common import Datum
from sklearn.cross_validation import train_test_split
import numpy
from itertools import izip
def parse_args():
from optparse import OptionParser, OptionValueError
p = OptionParser()
p.add_option('-s', '--server_ip', action='store',
dest='server_ip', type='string', default='127.0.0.1')
p.add_option('-p', '--server_port', action='store',
dest='server_port', type='int', default='9199')
p.add_option('-n', '--name', action='store',
dest='name', type='string', default='tutorial')
return p.parse_args()
def get_most_likely(estm):
ans = None
prob = None
result = {}
result[0] = ''
result[1] = 0
for res in estm:
if prob == None or res.score > prob :
ans = res.label
prob = res.score
result[0] = ans
result[1] = prob
return result
def cross_validation_python():
train_data = numpy.array([])
train_label = numpy.array([])
test_data = numpy.array([])
test_label = numpy.array([])
x_vector = []
y_vector = []
first_flag = 1
for line in open('election_data_regression.json'):
label, dat = line[:-1].split('\t')
y_vector.append(label)
x_vector = numpy.array(dat)
if first_flag == 1:
train_data = numpy.hstack((train_data, x_vector))
train_label = numpy.array(y_vector)
first_flag = 0
else:
train_data = numpy.vstack((train_data, x_vector))
train_label = numpy.array(y_vector)
train_list = [train_data, train_label]
return train_list
if __name__ == '__main__':
options, remainder = parse_args()
client = Regression(options.server_ip, options.server_port, '')
print(client.get_config())
print(client.get_status())
train_list = cross_validation_python()
data_train, data_test, label_train, label_test = train_test_split(train_list[0], train_list[1])
for label, dat in izip(label_train, data_train):
print(dat[0])
data_dict = json.loads(dat[0])
datum = Datum(data_dict)
client.train([[float(label), datum]])
print(client.get_status())
print(client.save("tutorial_regression"))
print(client.load("tutorial_regression"))
print(client.get_config())
count_ok = 0
count_ng = 0
for label, dat in izip(label_test, data_test):
data_dict = json.loads(dat[0])
datum = Datum(data_dict)
ans = client.estimate([datum])
print(ans)
if ans != None:
if (float(label) == ans[0]):
result = "OK"
count_ok += 1
else:
result = "NG"
count_ng += 1
print(result + "," + label + ", " + str(ans[0]) )
print("===================")
print("OK: {0}".format(count_ok))
print("NG: {0}".format(count_ng))