示例#1
0
文件: model.py 项目: awav/carnd-p5
class CarModel():
    def __init__(self, mode='svm'):
        if mode != 'svm' and mode != 'xgboost':
            raise ValueError('Unknown mode for CarModel')
        self._f = Features()
        self._model = None
        self._mode = mode
        self.input_shape = None
    def prepare(self, data, mode='standard'):
        self.input_shape = data.x_orig[0].shape
        features = self._f.extract(data.x_orig)
        self._f.fit_scaler(features, mode=mode)
        x = self._f.normalize(features)
        data.put_features(x)
        self._colormap = data.colormap
        return data
    def fit(self, data, random_state=11, show=False):
        if data.x is None:
            raise ValueError('Dataset does not have input values')
        x = data.x
        y = data.y_orig
        train, test = self._split_data(x, y)
        self._train(train, test, show=show)
    def predict(self, im, show=False):
        f = self._f
        im = common.cvt_color(im, color=self._colormap, src='RGB')
        pred = self._model.predict(f.normalize(f.extract(np.array([im]), show=show)))
        return pred[0]
    def _split_data(self, x, y, test_size=0.1, random_state=11):
        xtr, xt, ytr, yt = train_test_split(x, y, test_size=test_size, random_state=random_state)
        return (xtr, ytr), (xt, yt)
    def _one_hot_encode(self, y):
        width = np.unique(y).shape[0]
        height = y.shape[0]
        one = np.zeros((height, width), dtype=np.int32)
        one[range(height), y] = 1
        return one
    def _train(self, train, test, random_state=11, show=False):
        x, y = train
        xtest, ytest = test
        if self._mode == 'xgboost':
            self._model = XGBClassifier(
                              learning_rate=0.1,
                              n_estimators=150,
                              max_depth=5,
                              min_child_weight=1,
                              gamma=0,
                              subsample=0.8,
                              colsample_bytree=0.8,
                              objective='binary:logistic',
                              nthread=4,
                              scale_pos_weight=1,
                              seed=random_state)
            self._model.fit(x, y, eval_metric='auc')
        else:
            self._model = LinearSVC(random_state=random_state)
            #self._model = SVC(kernel='rbf', max_iter=25000, random_state=random_state)
            #self._model = SVC(max_iter=25000, random_state=random_state)
            self._model.fit(x, y)
        pred = self._model.predict(xtest)
        acc_msg = "Test accuracy: {0:.05f}"
        print(acc_msg.format(metrics.accuracy_score(ytest, pred)))
        if self._mode == 'xgboost':
                pred_prob = self._model.predict_proba(xtest)
                ytest_hot = self._one_hot_encode(ytest)
                auc_msg = "AUC score: {0:.05f}"
                print(auc_msg.format(metrics.roc_auc_score(ytest_hot, pred_prob)))
        if self._mode == 'xgboost' and show == True:
            importance = pd.Series(self._model.booster().get_fscore()).sort_values(ascending=False)
            importance.plot(kind='bar', title='Feature Importance')
            plt.show()
            print(importance[:10])
    def load(self, filename='data/model.p'):
        with open(filename, 'rb') as fd:
             self.__dict__ = pickle.load(fd)
    def save(self, filename='data/model.p'):
        with open(filename, 'wb') as fd:
             pickle.dump(self.__dict__, fd)