def RecommendByDT(train_data, train_data_y, test_data, test_data_y, recommendNum=5): grid_parameters = [ {'min_samples_leaf': [2, 4, 8, 16, 32, 64], 'max_depth': [2, 4, 6, 8]}] # 调节参数 from sklearn.tree import DecisionTreeClassifier from sklearn.model_selection import GridSearchCV clf = DecisionTreeClassifier() clf = GridSearchCV(clf, param_grid=grid_parameters, n_jobs=-1) clf.fit(train_data, train_data_y) predictions = clf.predict_proba(test_data) print(clf.best_params_) """预测结果转化为data array""" predictions = DataProcessUtils.convertMultilabelProbaToDataArray(predictions) print(predictions) recommendList = DataProcessUtils.getListFromProbable(predictions, range(1, train_data_y.shape[1] + 1), recommendNum) answerList = test_data_y print(predictions) print(test_data_y) print(recommendList) print(answerList) return [recommendList, answerList]
def RecommendByRF(train_data, train_data_y, test_data, test_data_y, recommendNum=5): """多标签分类 随机森林""" clf = RandomForestClassifier(n_estimators=50, max_depth=5, n_jobs=-1) """对弱分类器数量做调参数量""" # param_test1 = {'n_estimators': range(200, 250, 10)} # clf = GridSearchCV(estimator=clf, param_grid=param_test1) # print(clf.best_params_) # print(clf.best_params_, clf.best_score_) """对决策树的参数做调参""" # param_test2 = {'max_depth': range(6, 8, 1), 'min_samples_split': range(18, 22, 1)} # clf = GridSearchCV(estimator=clf, param_grid=param_test1, cv=5, n_jobs=5) clf.fit(train_data, train_data_y) predictions = clf.predict_proba(test_data) # print(clf.best_params_) # print(clf.best_score_) # print(clf.cv_results_) """预测结果转化为data array""" predictions = DataProcessUtils.convertMultilabelProbaToDataArray(predictions) print(predictions) recommendList = DataProcessUtils.getListFromProbable(predictions, range(1, train_data_y.shape[1] + 1), recommendNum) answerList = test_data_y print(predictions) print(test_data_y) print(recommendList) print(answerList) return [recommendList, answerList]
def RecommendByKN(train_data, train_data_y, test_data, test_data_y, recommendNum=5): """ML KNeighbors""" clf = KNeighborsClassifier() clf.fit(train_data, train_data_y) predictions = clf.predict_proba(test_data) """预测结果转化为data array""" predictions = DataProcessUtils.convertMultilabelProbaToDataArray(predictions) print(predictions) recommendList = DataProcessUtils.getListFromProbable(predictions, range(1, train_data_y.shape[1] + 1), recommendNum) answerList = test_data_y print(predictions) print(test_data_y) print(recommendList) print(answerList) return [recommendList, answerList]
def RecommendByETS(train_data, train_data_y, test_data, test_data_y, recommendNum=5): """多标签分类 """ clf = ExtraTreesClassifier(n_jobs=3, n_estimators=250) param_test2 = {'max_depth': range(10, 40, 10), 'min_samples_split': range(15, 30, 5)} clf = GridSearchCV(estimator=clf, param_grid=param_test2, iid=False, cv=10, n_jobs=2) clf.fit(train_data, train_data_y) predictions = clf.predict_proba(test_data) """预测结果转化为data array""" predictions = DataProcessUtils.convertMultilabelProbaToDataArray(predictions) print(predictions) recommendList = DataProcessUtils.getListFromProbable(predictions, range(1, train_data_y.shape[1] + 1), recommendNum) answerList = test_data_y print(predictions) print(test_data_y) print(recommendList) print(answerList) return [recommendList, answerList]