/
KernelLogisticRegression.py
241 lines (202 loc) · 9.68 KB
/
KernelLogisticRegression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
import numbers
import warnings
import numpy as np
# from scipy import optimize, sparse
# from scipy.special import expit
# from .base import LinearClassifierMixin, SparseCoefMixin, BaseEstimator
# from .sag import sag_solver
# from sklearn.preprocessing import LabelEncoder, LabelBinarizer
from sklearn.svm.base import _fit_liblinear
# from sklearn.utils import check_array, check_consistent_length, compute_class_weight
# from sklearn.utils import check_random_state
# from sklearn.utils.extmath import (log_logistic, safe_sparse_dot, softmax,
# squared_norm)
from sklearn.utils.extmath import softmax
from sklearn.utils.extmath import row_norms
# from sklearn.utils.fixes import logsumexp
# from sklearn.utils.optimize import newton_cg
from sklearn.utils.validation import check_X_y
# from sklearn.exceptions import (ConvergenceWarning,ChangedBehaviorWarning)
from sklearn.exceptions import NotFittedError
from sklearn.utils.multiclass import check_classification_targets
from sklearn.externals.joblib import Parallel, delayed, effective_n_jobs
from sklearn.utils.fixes import _joblib_parallel_args
# from sklearn.model_selection import check_cv
# from sklearn.externals import six
# from sklearn.metrics import get_scorer
from sklearn.linear_model.logistic import _check_solver,_check_multi_class,logistic_regression_path
from sklearn.linear_model import LogisticRegression
from sklearn.metrics.pairwise import pairwise_kernels
class KernelLogisticRegression(LogisticRegression):
def __init__(self, penalty='l2', dual=False, tol=1e-4, C=1.0,
kernel='linear',gamma=None,degree=3,coef0=1,kernel_params=None,
fit_intercept=True, intercept_scaling=1, class_weight=None,
random_state=None, solver='warn', max_iter=100,
multi_class='warn', verbose=0, warm_start=False, n_jobs=None):
self.kernel = kernel
self.gamma = gamma
self.degree = degree
self.coef0 = coef0
self.kernel_params = kernel_params
super().__init__(penalty, dual, tol, C,
fit_intercept, intercept_scaling, class_weight,
random_state, solver, max_iter,
multi_class, verbose, warm_start, n_jobs)
def _get_kernel(self,X,Y=None):
if callable(self.kernel):
params = self.kernel_params or {}
else:
params = {"gamma": self.gamma,
"degree": self.degree,
"coef0": self.coef0}
return pairwise_kernels(X,Y,metric=self.kernel,filter_params=True,**params)
def fit(self,X,y,sample_weight=None):
if not isinstance(self.C, numbers.Number) or self.C < 0:
raise ValueError("Penalty term must be positive; got (C=%r)"
% self.C)
if not isinstance(self.max_iter, numbers.Number) or self.max_iter < 0:
raise ValueError("Maximum number of iteration must be positive;"
" got (max_iter=%r)" % self.max_iter)
if not isinstance(self.tol, numbers.Number) or self.tol < 0:
raise ValueError("Tolerance for stopping criteria must be "
"positive; got (tol=%r)" % self.tol)
solver = _check_solver(self.solver, self.penalty, self.dual)
if solver in ['newton-cg']:
_dtype = [np.float64, np.float32]
else:
_dtype = np.float64
X, y = check_X_y(X, y, accept_sparse='csr', dtype=_dtype, order="C",
accept_large_sparse=solver != 'liblinear')
self.X_fit_ = X
X_k = self._get_kernel(X)
check_classification_targets(y)
self.classes_ = np.unique(y)
n_samples, n_features = X_k.shape
multi_class = _check_multi_class(self.multi_class, solver,
len(self.classes_))
if solver == 'liblinear':
if effective_n_jobs(self.n_jobs) != 1:
warnings.warn("'n_jobs' > 1 does not have any effect when"
" 'solver' is set to 'liblinear'. Got 'n_jobs'"
" = {}.".format(effective_n_jobs(self.n_jobs)))
self.coef_, self.intercept_, n_iter_ = _fit_liblinear(
X_k, y, self.C, self.fit_intercept, self.intercept_scaling,
self.class_weight, self.penalty, self.dual, self.verbose,
self.max_iter, self.tol, self.random_state,
sample_weight=sample_weight)
self.n_iter_ = np.array([n_iter_])
return self
if solver in ['sag', 'saga']:
max_squared_sum = row_norms(X_k, squared=True).max()
else:
max_squared_sum = None
n_classes = len(self.classes_)
classes_ = self.classes_
if n_classes < 2:
raise ValueError("This solver needs samples of at least 2 classes"
" in the data, but the data contains only one"
" class: %r" % classes_[0])
if len(self.classes_) == 2:
n_classes = 1
classes_ = classes_[1:]
if self.warm_start:
warm_start_coef = getattr(self, 'coef_', None)
else:
warm_start_coef = None
if warm_start_coef is not None and self.fit_intercept:
warm_start_coef = np.append(warm_start_coef,
self.intercept_[:, np.newaxis],
axis=1)
self.coef_ = list()
self.intercept_ = np.zeros(n_classes)
# Hack so that we iterate only once for the multinomial case.
if multi_class == 'multinomial':
classes_ = [None]
warm_start_coef = [warm_start_coef]
if warm_start_coef is None:
warm_start_coef = [None] * n_classes
path_func = delayed(logistic_regression_path)
# The SAG solver releases the GIL so it's more efficient to use
# threads for this solver.
if solver in ['sag', 'saga']:
prefer = 'threads'
else:
prefer = 'processes'
fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
**_joblib_parallel_args(prefer=prefer))(
path_func(X_k, y, pos_class=class_, Cs=[self.C],
fit_intercept=self.fit_intercept, tol=self.tol,
verbose=self.verbose, solver=solver,
multi_class=multi_class, max_iter=self.max_iter,
class_weight=self.class_weight, check_input=False,
random_state=self.random_state, coef=warm_start_coef_,
penalty=self.penalty,
max_squared_sum=max_squared_sum,
sample_weight=sample_weight)
for class_, warm_start_coef_ in zip(classes_, warm_start_coef))
fold_coefs_, _, n_iter_ = zip(*fold_coefs_)
self.n_iter_ = np.asarray(n_iter_, dtype=np.int32)[:, 0]
if multi_class == 'multinomial':
self.coef_ = fold_coefs_[0][0]
else:
self.coef_ = np.asarray(fold_coefs_)
self.coef_ = self.coef_.reshape(n_classes, n_features +
int(self.fit_intercept))
if self.fit_intercept:
self.intercept_ = self.coef_[:, -1]
self.coef_ = self.coef_[:, :-1]
return self
def predict_proba(self, X):
"""Probability estimates.
The returned estimates for all classes are ordered by the
label of classes.
For a multi_class problem, if multi_class is set to be "multinomial"
the softmax function is used to find the predicted probability of
each class.
Else use a one-vs-rest approach, i.e calculate the probability
of each class assuming it to be positive using the logistic function.
and normalize these values across all the classes.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Returns
-------
T : array-like, shape = [n_samples, n_classes]
Returns the probability of the sample for each class in the model,
where classes are ordered as they are in ``self.classes_``.
"""
if not hasattr(self, "coef_"):
raise NotFittedError("Call fit before prediction")
X_k = self._get_kernel(X,self.X_fit_)
ovr = (self.multi_class in ["ovr", "warn"] or
(self.multi_class == 'auto' and (self.classes_.size <= 2 or
self.solver == 'liblinear')))
if ovr:
return super(LogisticRegression, self)._predict_proba_lr(X_k)
else:
decision = self.decision_function(X_k)
if decision.ndim == 1:
# Workaround for multi_class="multinomial" and binary outcomes
# which requires softmax prediction with only a 1D decision.
decision_2d = np.c_[-decision, decision]
else:
decision_2d = decision
return softmax(decision_2d, copy=False)
def predict(self, X):
"""Predict class labels for samples in X.
Parameters
----------
X : array_like or sparse matrix, shape (n_samples, n_features)
Samples.
Returns
-------
C : array, shape [n_samples]
Predicted class label per sample.
"""
Xk = self._get_kernel(X,self.X_fit_)
scores = self.decision_function(Xk)
if len(scores.shape) == 1:
indices = (scores > 0).astype(np.int)
else:
indices = scores.argmax(axis=1)
return self.classes_[indices]