示例#1
0
 def __init__(self, params):
     super(%CLASS%, self).__init__(params)
     tmp = LinearSVR()
     params = tmp.get_params()
     for key in params:
         self.create_new_input(type_="data", label=key, widget_name="std line edit m", widget_pos="besides", pos=-1)
     del tmp
 def __init__(self, params):
     super(LinearSVRGetParams_NodeInstance, self).__init__(params)
     tmp = LinearSVR()
     params = tmp.get_params()
     for key in params:
         self.create_new_output(type_="data", label=key, pos=-1)
     del tmp
     self.create_new_output(type_="data", label="param dict", pos=-1)
    class LinearSVRPermuteCoef:
        def __init__(self, **kwargs):
            self.model = LinearSVR(**kwargs)

        def fit(self, X, y):
            self.model.fit(X, y)

            self.coef_ = self.model.coef_
            self.intercept_ = self.model.intercept_

            def add_coef(arr, fn):
                arr.append(fn(self.coef_))

            add_coef(coeffs_state['max'], np.max)
            add_coef(coeffs_state['min'], np.min)

            return self

        def get_params(self, deep=True):
            return self.model.get_params(deep)

        def set_params(self, **kwargs):
            self.model.set_params(**kwargs)
            return self

        def predict(self, X):
            return self.model.predict(X)

        def score(self, X, y, sample_weight=None):
            if sample_weight is not None:
                return self.model.score(X, y, sample_weight)
            else:
                return self.model.score(X, y)

        @staticmethod
        def permute_min_coefs():
            return coeffs_state['min']

        @staticmethod
        def permute_max_coefs():
            return coeffs_state['max']

        @staticmethod
        def reset_perm_coefs():
            coeffs_state['min'] = []
            coeffs_state['max'] = []
    class LinearSVRPermuteCoef:
        def __init__(self, **kwargs):
            self.model = LinearSVR(**kwargs)

        def fit(self, X, y):
            self.model.fit(X, y)

            self.coef_ = self.model.coef_
            self.intercept_ = self.model.intercept_

            def add_coef(arr, fn):
                arr.append(fn(self.coef_))

            add_coef(coeffs_state['max'], np.max)
            add_coef(coeffs_state['min'], np.min)

            return self

        def get_params(self, deep=True):
            return self.model.get_params(deep)

        def set_params(self, **kwargs):
            self.model.set_params(**kwargs)
            return self

        def predict(self, X):
            return self.model.predict(X)

        def score(self, X, y, sample_weight=None):
            if sample_weight is not None:
                return self.model.score(X, y, sample_weight)
            else:
                return self.model.score(X, y)

        @staticmethod
        def permute_min_coefs():
            return coeffs_state['min']

        @staticmethod
        def permute_max_coefs():
            return coeffs_state['max']

        @staticmethod
        def reset_perm_coefs():
            coeffs_state['min'] = []
            coeffs_state['max'] = []
示例#5
0
    else:
        test_set.append([
            predictiveAttributeNotDegree[i][11],
            predictiveAttributeNotDegree[i][13]
        ])
        test_result.append([predictiveAttributeNotDegree[i][2]])

svm_reg = LinearSVR(epsilon=1.0, max_iter=10000000)
train_result = np.array(train_result)
svm_reg.fit(train_set, train_result.ravel())
print(svm_reg.score(test_set, test_result))
prediction = []
for item in test_set:
    items = [[item[0], item[1]]]
    prediction.append(svm_reg.predict(items))
pred = np.zeros(len(prediction))
predi = np.array(prediction)
for i in range(len(prediction)):
    pred[i] = predi[i][0]

print(("MSE: {}".format(mean_squared_error(pred, test_result))))
print("Params: ", svm_reg.get_params())
test_set = np.array(test_set)
test_result = np.array(test_result)
from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt

plot_decision_regions(test_set, test_result.reshape(-1), clf=svm_reg, legend=2)
plt.title('SVM Decision Region Boundary', size=16)
plt.show()
示例#6
0
        test_result_tot.append([predictiveAttributeDegree[i][2]])
train_percent = (len(predictiveAttributeNotDegree)/100)*80
count = 0
for i in range(len(predictiveAttributeNotDegree)):
    if count < train_percent:
        count = count + 1
        train_set_tot.append([predictiveAttributeNotDegree[i][0], predictiveAttributeNotDegree[i][1], predictiveAttributeNotDegree[i][6],
                          predictiveAttributeNotDegree[i][7], predictiveAttributeNotDegree[i][9], predictiveAttributeNotDegree[i][10],
                          predictiveAttributeNotDegree[i][11], predictiveAttributeNotDegree[i][12],predictiveAttributeNotDegree[i][17],
                          predictiveAttributeNotDegree[i][18]])
        train_result_tot.append([predictiveAttributeNotDegree[i][2]])
    else:
        test_set_tot.append([predictiveAttributeNotDegree[i][0], predictiveAttributeNotDegree[i][1], predictiveAttributeNotDegree[i][6],
                          predictiveAttributeNotDegree[i][7], predictiveAttributeNotDegree[i][9], predictiveAttributeNotDegree[i][10],
                          predictiveAttributeNotDegree[i][11], predictiveAttributeNotDegree[i][12],predictiveAttributeNotDegree[i][17],
                          predictiveAttributeNotDegree[i][18]])
        test_result_tot.append([predictiveAttributeNotDegree[i][2]])

train_result_tot = np.array(train_result_tot)
svm_reg_tot.fit(train_set_tot, train_result_tot.ravel())

print("----ALL ATTRIBUTE: score: ", svm_reg_tot.score(test_set_tot, test_result_tot))
#              0. matr 1.cf  6.tipoCds  7.coorte  9.annodiploma 10.votodip 11.codschool 12.tipoMat  17.mot_sta 18.sta
newStudent = [[2933, 2928, 1, 2015, 2015, 100, 200, 9, 3, 10]]
real_value = [30]
predicted = svm_reg_tot.predict(newStudent)

print("----ALL ATTRIBUTE: Predicted: ", predicted)
print("----ALL ATTRIBUTE: MSE: ", mean_squared_error(real_value, svm_reg_tot.predict(newStudent)))
print("----ALL ATTRIBUTE: Params: ", svm_reg_tot.get_params())
示例#7
0
import pickle

from sklearn.svm import LinearSVR as SVR

from dim_utils import *

# Train a linear SVR

npzfile = np.load('large_data.npz')
model_name = 'model.pickle'

X = npzfile['X']

y = npzfile['y']

# we already normalize these values in gen.py

# X /= X.max(axis=0, keepdims=True)

svr = SVR(C=1)

svr.fit(X, y)

with open(model_name, 'wb') as pickle_file:
    pickle.dump(svr, pickle_file)

print(svr.get_params())  # to save the parameters

# svr.set_params() to restore the parameters

# predict
示例#8
0
class TextRegressor:
    param_defaults = {'min_df': 1, 'c_ngmin': 1, 'c_ngmax': 1,
                      'w_ngmax': 1, 'w_ngmin': 1, 'lowercase': 'word',
                      'alpha': 1.0, 'C': 1.0, 'mix': 1.0}
    def __init__(self, regressor='ridge', vectorizer='tf-idf'):
        if regressor == 'ridge':
            from sklearn.linear_model import Ridge
            self.reg = Ridge()
        elif regressor == 'SVR':
            from sklearn.svm import SVR
            self.reg = SVR()
        elif regressor == 'linearsvr':
            from sklearn.svm import LinearSVR
            self.reg = LinearSVR()
        if vectorizer == 'tf-idf':
            from sklearn.feature_extraction.text import TfidfVectorizer
            self.vec = TfidfVectorizer()
        self.vec_params_default = self.vec.get_params()
        self.reg_params_default = self.reg.get_params()
        self._reset()

    def _reset(self):
        self.par = dict(self.param_defaults)
        self.vec_params = self.vec_params_default
        self.vec.set_params(**self.vec_params)
        self.reg_params = self.reg_params_default
        self.reg.set_params(**self.reg_params)

    def set_params(self, **params):
        self._reset()
        self.par.update(params)
        ngram_analyzer = DocAnalyzer(
                    lowercase=self.par.get('lowercase'),
                    c_ngmin=self.par.get('c_ngmin'),
                    c_ngmax=self.par.get('c_ngmax'),
                    w_ngmin=self.par.get('w_ngmin'),
                    w_ngmax=self.par.get('w_ngmax'))
        self.vec_params.update(
            {k:self.par[k] for k in self.par.keys() & self.vec_params.keys()})
        self.vec.set_params(**self.vec_params)
        self.vec.set_params(analyzer=ngram_analyzer)
        self.reg_params.update(
            {k:self.par[k] for k in self.par.keys() & self.reg_params.keys()})
        self.reg.set_params(**self.reg_params)

    def get_params(self):
        return self.par

    def fit(self, text, outcome):
        num = None
        if len(text) == 2:
            text, num = text
        x = self.vec.fit_transform(text)
        if num is not None:
            x = hstack((x, self.par['mix'] * num), format='csr')
        self.reg.fit(x, outcome)

    def predict(self, text,
                gold=None, gold_rank=None, rank_dir=-1, return_score=False):
        num = None
        if len(text) == 2:
            text, num = text
        x = self.vec.transform(text)
        if num is not None:
            x = hstack((x, self.par['mix'] * num), format='csr')
        pred = self.reg.predict(x)
        if return_score:
            return pred, self._score(gold, pred, gold_rank, rank_dir)
        else:
            return pred

    def _score(self, gold, pred, gold_rank=None, rank_dir=-1,
            verbose=False):
        r2 = r2_score(gold, pred)
        rmse = np.sqrt(mean_squared_error(gold, pred))
        if gold_rank is None:
            gold_rank = rankdata(rank_dir * gold, method='ordinal')
        pred_rank = rankdata(rank_dir * pred, method='ordinal')
        corr, _ = pearsonr(gold, pred)
        rank_corr, _ = pearsonr(gold_rank, pred_rank)
        if verbose:
            fmt = ("{}: n={}, min={:.4f}, max={:.4f}, mean={:.4f}, "
                   "var={:.4f}, skew={:.4f}, kurtosis={:.4f}")
            gold_dsc = describe(gold)
            pred_dsc = describe(pred)
            print(fmt.format('gold',
                gold_dsc[0], *gold_dsc[1], *gold_dsc[2:]))
            print(fmt.format('pred',
                pred_dsc[0], *pred_dsc[1], *pred_dsc[2:]))
        return {'r2': r2, 'rmse': rmse, 'rank_corr': rank_corr, 'corr': corr}

    def score(self, text, gold, gold_rank=None, rank_dir=-1,
            verbose=False):
        pred = self.predict(text)
        return self._score(gold, pred, gold_rank, rank_dir,
                verbose=verbose)
示例#9
0
data, nrows, ncols = readDataSet("YearPredictionMSD20.txt")
X = data[:, 1:91]
y = data[:, 0]
pca = PCA(n_components=10)
pca.fit(X)
PCA(copy=True,
    iterated_power='auto',
    n_components=10,
    random_state=None,
    svd_solver='auto',
    tol=0.0,
    whiten=False)
print(pca.explained_variance_ratio_)
print pca.components_
# print pca.explained_variance_
# print pca.mean_
print pca.n_components_
# print pca.noise_variance_
print pca.components_[1]
rowFeatureVector = pca.components_
X = np.dot(rowFeatureVector, X.transpose())
X = X.transpose()
print len(X)
print X
clf = LinearSVR(C=1.0, epsilon=0, verbose=1, max_iter=1000)
clf.fit(X, y)
print clf.predict(X)
print y
print clf.score(X, y)
print clf.get_params(deep=True)
示例#10
0
class Baseline:
    def __init__(self, city, dest_name):
        self.city = city
        self.dest_name = dest_name
        print 'Baseline implementation for {:s} : {:s}'.format(
            self.city, self.dest_name)
        dest_to_idx = {
            'bofa': 0,
            'church': 1,
            'gas_station': 3,
            'high_school': 3,
            'mcdonalds': 4
        }
        self.idx = dest_to_idx[self.dest_name]
        self.base_dir = osp.join('../data/dataset', city)
        self.train_label_filename = osp.join(self.base_dir, 'distance',
                                             'train_labels.h5')
        self.train_im_list_filename = osp.join(self.base_dir, 'distance',
                                               'train_im_list.txt')
        self.test_label_filename = osp.join(self.base_dir, 'distance',
                                            'test_labels.h5')
        self.test_im_list_filename = osp.join(self.base_dir, 'distance',
                                              'test_im_list.txt')
        self.svr = LinearSVR(verbose=1,
                             epsilon=0,
                             dual=False,
                             tol=1e-3,
                             max_iter=50000,
                             loss='squared_epsilon_insensitive')
        self.scaler = StandardScaler(copy=False)
        self.model_filename = osp.join(self.base_dir, 'distance',
                                       '{:s}.pkl'.format(self.dest_name))

    def collect_train_data(self):
        with open(self.train_im_list_filename, 'r') as train_f_im:
            train_im_names = [l.rstrip() for l in train_f_im]

        print 'Loading train data...'
        with h5py.File('../data/dataset/train_feats1.mat', 'r') as f:
            self.train_X = np.asarray(f['train_features'], dtype=np.float32).T

        with h5py.File(self.train_label_filename, 'r') as train_f_label:
            self.train_y = train_f_label['label'][:,
                                                  self.idx].astype(np.float32)

        # select cities and remove rogue labels
        # idx = [i for i,n in enumerate(train_im_names) if ((('boston' in n)) and self.train_y[i] < 1e3)]
        idx = [
            i for i, n in enumerate(train_im_names) if self.train_y[i] < 1e3
        ]

        self.train_X = self.train_X[idx, :]
        self.train_y = self.train_y[idx]

        assert (self.train_y.shape[0] == self.train_X.shape[0])
        print 'Done, using {:d} images for training'.format(
            self.train_X.shape[0])

    def train(self, C=1.0):
        print 'Scaling...'
        self.train_X = self.scaler.fit_transform(self.train_X)
        print 'Training with C = {:f}'.format(C)
        p = self.svr.get_params()
        p['C'] = C
        self.svr.set_params(**p)
        self.svr.fit(self.train_X, self.train_y)

    def save_predictions(self):
        with h5py.File('../data/dataset/test_feats.mat', 'r') as f:
            print 'Loading feats...'
            self.test_X = np.asarray(f['test_features'], dtype=np.float32).T

        with open('../data/dataset/test_filenames.txt', 'r') as f:
            im_names = [n.rstrip() for n in f]
        keys = [get_key(im_name) for im_name in im_names]

        assert (len(im_names) == self.test_X.shape[0])

        print 'Loading models...'
        d = joblib.load(self.model_filename)
        self.svr = d['svr']
        self.scaler = d['scaler']

        print 'Scaling...'
        self.test_X = self.scaler.transform(self.test_X)
        print 'Predicting...'
        preds = self.svr.predict(self.test_X)
        print 'Done!'
        pred_dict = {key: pred for (key, pred) in zip(keys, preds)}
        fn = '../data/dataset/test_preds_{:s}.pk'.format(self.dest_name)
        with open(fn, 'w') as f:
            pickle.dump(pred_dict, f)
        print 'Saved', fn

    def save_current_model(self):
        joblib.dump({
            'svr': self.svr,
            'scaler': self.scaler
        }, self.model_filename)
        print self.model_filename, 'saved'
示例#11
0
math.sqrt(mse_rf)

# 실제값과 예측값 흐름 시각화 하기 (matplotlib 이용)

plt.figure()
plt.plot(np.arange(98), pred1, label='predict')
plt.plot(np.arange(98), test_y, label='발생건수')
plt.legend()

# support vector machine 사용해보기

from sklearn.svm import LinearSVR

# 최적 파라미터 찾기
svm_model = LinearSVR(random_state=486)
svm_model.get_params().keys()  # svm의 파라미터들

param_grid_svm = {
    'C': [1, 10, 100, 1000, 10000, 20000, 50000, 100000],
    'epsilon': [0.1, 0.5, 1, 1.25, 1.5, 1.75, 2]
}

grid_svm = GridSearchCV(svm_model, param_grid=param_grid_svm, cv=5)
grid_svm.fit(train_x, train_y)
print(grid_svm.best_params_)  # best 파라미터 출력

svm_model = LinearSVR(C=50000, random_state=486, epsilon=1.75)
svm_model.fit(train_x, train_y)
pred_svm = svm_model.predict(test_x)

plt.figure()