示例#1
0
class VotingEnsemble(BaseClassifier):
    def __init__(self, feature_length, num_classes, x=10):

        super().__init__(feature_length, num_classes)

        self.model = VotingClassifier(estimators=[
            ('gba',
             GradientBoostingClassifier(n_estimators=100,
                                        learning_rate=1.0,
                                        max_depth=1,
                                        random_state=0)),
            ('knn',
             KNeighborsClassifier(metric='manhattan',
                                  weights='distance',
                                  n_neighbors=3)),
            ('Nc', NearestCentroid(metric='manhattan')), ('nvb', GaussianNB()),
            ('rf', RandomForestClassifier(n_estimators=10,
                                          criterion='entropy')),
            ('svmlin', svm.SVC(kernel='linear')),
            ('svmpol', svm.SVC(kernel='poly')),
            ('svmrbf', svm.SVC(kernel='rbf'))
        ],
                                      voting='hard')

        self.num_classes = num_classes

    def train(self, features, labels):
        """
        Using a set of features and labels, trains the classifier and returns the training accuracy.
        :param features: An MxN matrix of features to use in prediction
        :param labels: An M row list of labels to train to predict
        :return: Prediction accuracy, as a float between 0 and 1
        """

        labels = self.labels_to_categorical(labels)
        self.model.fit(features, labels)
        accuracy = self.model.score(features, labels)
        return accuracy

    # make sure you save model using the same library as we used in machine learning price-predictor

    def predict(self, features, labels):
        """
        Using a set of features and labels, predicts the labels from the features,
        and returns the accuracy of predicted vs actual labels.
        :param features: An MxN matrix of features to use in prediction
        :param labels: An M row list of labels to test prediction accuracy on
        :return: Prediction accuracy, as a float between 0 and 1
        """
        label_train = self.labels_to_categorical(labels)
        labels = self.model.predict(features)
        accuracy = self.model.score(features, label_train)
        return accuracy

    def get_prediction(self, features):
        return self.model.predict(features)

    def reset(self):
        """
        Resets the trained weights / parameters to initial state
        :return:
        """

        pass

    def labels_to_categorical(self, labels):
        _, IDs = unique(labels, return_inverse=True)
        return IDs
    ('l2', pipe2),
    ('l3', pipe3),
    ('l4', pipe4),
    ('l5', pipe5),
],
                       n_jobs=4)
cls.fit(cars_train_X, cars_train_y)

# uncomment the 3 lines below if needed to see the accuracy and std-dev of the training set
# scores = cross_val_score(cls, cars_train_X, cars_train_y, cv=5, verbose=True)
# print(scores)
# print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
# this reaches about 30% acc

# create the predictions and dump to a file for plotting the heatmap
y_pred = cls.predict(cars_test_X)

with open('5subset_linearsvm_voting.sav', 'wb') as f:
    pkl.dump((y_pred, cars_test_y), f)

y_true = cars_test_y
preds = {}
for i in range(y_true.shape[0]):
    if y_true[i] == y_pred[i]:
        if y_true[i] not in preds:
            preds[y_true[i]] = 1
        else:
            preds[y_true[i]] += 1
for y in y_true:
    if y not in preds:
        preds[y] = 0