Пример #1
0
class r07945013_knn(classification):
    def trainAlgo(self):
        self.model = KNeighborsRegressor(n_neigbors=self.param['n_neigbors'],
                                         algorithm=self.param['algorithm'],
                                         leaf_size=self.param['leaf_size'],
                                         weights=self.param['weights'],
                                         p=self.param['p'])
        y = np.argmax(self.outputData['Y'], axis=1)
        self.model.fit(self.inputData['X'], y)

    def predictAlgo(self):
        self.result['Y'] = self.model.predict_proba(self.inputData['X'])
            np.mean(knn_cv['test_score']),
            np.mean(knn_cv['score_time'])
        ])

# create a data frame with the final data
knn_cv_results = pd.DataFrame(knn_cv_results)
knn_cv_results.columns = [
    'K', 'num_folds', 'cv_train_score', 'cv_test_score', 'time'
]

# plot test error for each of the Ks for the 10 fold run
a = knn_cv_results.loc[knn_cv_results['num_folds'] == 10]
plt.plot(a.loc[:, 'K'], a.loc[:, 'cv_test_score'])
plt.xlabel('K')
plt.ylabel('test set prediction accuracy')
plt.title(
    "Test Set Prediction Accuracy vs K for Leaf Classification with 10 CV folds"
)
plt.show()

# fit best model to full training data set and do prediction of test set
best_knn_clf = KNeighborsRegressor(n_neighbors=1)
best_knn_clf.fit(x_train_dev, y_train_dev)
knn_test_pred = best_knn_clf.predict_proba(x_test)

# format for submission
sub = pd.DataFrame(knn_test_pred, columns=list(label_enc.classes_))
sub.insert(0, 'id', test_ids)
sub.reset_index()
sub.to_csv('knn_submission.csv', index=False)
Пример #3
0
class KNN(object):
    def __init__(self, task_type="cla", module_type="performance", **params):

        assert task_type in ["cla", "reg"]  # 两种类型
        assert module_type in ["balance", "debug", "performance",
                               None]  # 三种 性能模型
        self.module_type = module_type

        if self.module_type == "debug":
            params["n_jobs"] = 1
        elif self.module_type == "performance":  # 性能模型
            params["n_jobs"] = cpu_count()  # cpu核心数
        elif self.module_type == "balance":  # 均衡模型
            params["n_jobs"] = cpu_count() // 2
        else:
            params["n_jobs"] = None

        self.task_type = task_type
        # weights  取值{"uniform", "distance",None}   # 默认使用的uniform
        # "algorithm" 取值 {"auto", "ball_tree", "kd_tree", "brute",None}s
        # 权重 uniform 均匀权重, distance  按照其距离的倒数
        # "ball_tree" 使用BallTree算法,
        # kd_tree   使用的KDTree 算法
        # brute   使用暴力搜索 算法。

        # p的取值,  int 类型数据, 默认为2  马尔科夫功率参数
        # p=1 时,等校于p=2使用manhattan_distance(l1)和euclidean_distance(l2).
        # 对于任意的p, 使用minkowskidistance(l_p)

        if self.task_type == "cla":
            self.model = KNeighborsClassifier(
                n_neighbors=params.get("n_neighbors", 5),
                weights=params.get("weights", 'uniform'),
                algorithm=params.get("algorithm", 'auto'),
                leaf_size=params.get("leaf_size", 30),  # 叶子大小
                p=params.get("p", 2),
                metric=params.get("metric", 'minkowski'),
                metric_params=params.get("metric_params", None),
                n_jobs=params.get("n_jobs", None)  # 并行数
            )

        else:
            self.model = KNeighborsRegressor(
                n_neighbors=params.get("n_neighbors", 5),
                weights=params.get("weights", 'uniform'),
                algorithm=params.get("algorithm", 'auto'),
                leaf_size=params.get("leaf_size", 30),
                p=params.get("p", 2),
                metric=params.get("metric", 'minkowski'),
                metric_params=params.get("metric_params", None),
                n_jobs=params.get("n_jobs", None))

    def fit(self, x, y=None):
        self.model.fit(X=x, y=y)

    def get_params(self):
        return self.model.get_params(deep=True)

    def set_params(self, params):
        self.model.set_params(**params)

    def predict(self, x):
        return self.model.predict(X=x)

    def predict_proba(self, x):
        if self.task_type == "cla":
            return self.model.predict_proba(X=x)
        else:
            ValueError("回归任务无法使用")

    def get_score(self, x, y, sample_weight):
        return self.model.score(X=x, y=y, sample_weight=sample_weight)

    def search_kneighbors(self,
                          x=None,
                          n_neighbors=None,
                          return_distance=True):  # 查找K近邻居
        return self.model.kneighbors(X=x,
                                     n_neighbors=n_neighbors,
                                     return_distance=return_distance)

    def get_kneighbors_graph(self,
                             x=None,
                             n_neighbors=None,
                             mode='connectivity'):  # 获取最近邻图
        """
        :param x:
        :param n_neighbors:
        :param mode: "distance","connectivity"
        :return:
        """
        return self.model.kneighbors_graph(X=x,
                                           n_neighbors=n_neighbors,
                                           mode=mode)