-
Notifications
You must be signed in to change notification settings - Fork 1
/
adaboost_classifiers.py
179 lines (130 loc) · 5.98 KB
/
adaboost_classifiers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
from models_deeplearn import SDAE
from theano import function, config, shared
import theano.tensor as T
import numpy as np
class AdaClassifier(object):
def train(self,tr_all,v_all,weights):
pass
def get_labels(self):
pass
def get_test_results(self,ts_data):
pass
class UseSDAE(AdaClassifier):
def __init__(self,param):
self.batch_size = param['batch_size']
self.iterations = param['iterations']
self.in_size = param['in_size']
self.out_size = param['out_size']
self.hid_sizes = param['hid_sizes']
self.learning_rate = param['learning_rate']
self.pre_epochs = param['pre_epochs']
self.finetune_epochs = param['fine_epochs']
self.lam = param['lam']
self.act = param['act']
self.sdae = SDAE(self.in_size,self.out_size,self.hid_sizes,self.batch_size,self.learning_rate,self.lam,self.act,self.iterations)
self.sdae.process()
self.theano_tr_ids, self.tr_pred, self.tr_act = [],[],[]
def train(self,tr_all,v_all,weights):
from math import ceil
tr_ids,tr_x,tr_y = tr_all
v_ids,v_x,v_y = v_all
weights_shr = shared(value=np.asarray(weights,dtype=config.floatX),borrow=True)
def get_shared_data(data_xy):
data_x,data_y = data_xy
shared_x = shared(value=np.asarray(data_x,dtype=config.floatX),borrow=True)
shared_y = shared(value=np.asarray(data_y,dtype=config.floatX),borrow=True)
return shared_x,T.cast(shared_y,'int32')
train = get_shared_data((tr_x,tr_y))
valid = get_shared_data((v_x,v_y))
n_train_batches = ceil(train[0].get_value(borrow=True).shape[0] / self.batch_size)
n_valid_batches = ceil(valid[0].get_value(borrow=True).shape[0] / self.batch_size)
pretrain_func = self.sdae.pre_train(train[0],train[1])
finetune_func = self.sdae.fine_tune(train[0],train[1],weights_shr)
my_valid_id_tensor = shared(value=np.asarray(v_ids,dtype=config.floatX),borrow=True)
my_valid_id_int_tensor = T.cast(my_valid_id_tensor,'int32')
validate_func = self.sdae.validate(valid[0],valid[1],my_valid_id_int_tensor)
my_train_id_tensor = shared(value=np.asarray(tr_ids,dtype=config.floatX),borrow=True)
my_train_id_int_tensor = T.cast(my_train_id_tensor,'int32')
tr_validate_func = self.sdae.validate(train[0],train[1],my_train_id_int_tensor)
for epoch in range(self.pre_epochs):
pre_train_cost = []
for b in range(n_train_batches):
pre_train_cost.append(pretrain_func(b))
print('Pretrain cost ','(epoch ', epoch,'): ',np.mean(pre_train_cost))
min_valid_err = np.inf
for epoch in range(self.finetune_epochs):
from random import shuffle
finetune_cost = []
b_idx =[i for i in range(0,n_train_batches)]
shuffle(b_idx)
for b in b_idx:
cost = finetune_func(b)
finetune_cost.append(cost)
if epoch%10==0:
print('Finetune cost: ','(epoch ', epoch,'): ',np.mean(finetune_cost))
valid_cost = []
for b in range(n_valid_batches):
ids,errs,pred_y,act_y = validate_func(b)
valid_cost.append(errs)
curr_valid_err = np.mean(valid_cost)
print('Validation error: ',np.mean(valid_cost))
if curr_valid_err*0.99>min_valid_err:
break
elif curr_valid_err<min_valid_err:
min_valid_err = curr_valid_err
for b in range(n_train_batches):
t_ids,t_errs,t_pred_y,t_act_y = tr_validate_func(b)
self.theano_tr_ids.extend(t_ids)
self.tr_pred.extend([np.argmax(arr) for arr in t_pred_y])
self.tr_act.extend([np.argmax(arr) for arr in t_act_y])
def get_labels(self):
return self.theano_tr_ids,self.tr_pred,self.tr_act
def get_test_results(self,ts_data):
ts_ids, test_x = ts_data
test_x = shared(value=np.asarray(test_x,dtype=config.floatX),borrow=True)
test_func = self.sdae.test(test_x)
n_test_batches = (test_x.get_value(borrow=True).shape[0])
test_out_probs = []
for b in range(n_test_batches):
cls,probs = test_func(b)
test_out_probs.append(probs[0])
return ts_ids,test_out_probs
from models_xgboost import XGBoost
class UseXGBoost(AdaClassifier):
def __init__(self,params):
self.xgboost = XGBoost(params)
self.tr_ids,self.tr_pred,self.tr_act = [],[],[]
def train(self,tr_all,v_all,weights):
ids, pred, act = self.xgboost.train_clf(tr_all,v_all,weights)
self.tr_ids = ids
self.tr_pred = pred
self.tr_act = act
def get_labels(self):
return self.tr_ids,self.tr_pred,self.tr_act
def get_test_results(self,ts_data):
ts_ids, test_x = ts_data
return ts_ids,self.xgboost.test_clf(test_x)
from models_sklearn import SVM
class UseSVM(AdaClassifier):
def __init__(self,params):
self.svm = SVM(params)
self.tr_ids,self.tr_pred,self.tr_act = [],[],[]
def train(self,tr_all,v_all,weights):
self.tr_ids,self.tr_pred,self.tr_act = self.svm.train(tr_all,v_all,weights)
def get_labels(self):
return self.tr_ids,self.tr_pred,self.tr_act
def get_test_results(self,ts_data):
ts_ids, test_x = ts_data
return ts_ids,self.svm.test(test_x)
from models_sklearn import RandForest
class UseRF(AdaClassifier):
def __init__(self,params):
self.rf = RandForest(params)
self.tr_ids,self.tr_pred,self.tr_act = [],[],[]
def train(self,tr_all,v_all,weights):
self.tr_ids,self.tr_pred,self.tr_act = self.rf.train(tr_all,v_all,weights)
def get_labels(self):
return self.tr_ids,self.tr_pred,self.tr_act
def get_test_results(self,ts_data):
ts_ids, test_x = ts_data
return ts_ids,self.rf.test(test_x)