feature, value = ChooseBestFeature(dataArray, resultList, maxDataCount, minErrorReduce) if feature == None: #Leaf node = Node(0, value, None, None) else: subDataArray1, subResultList1, subDataArray2, subResultList2 = SplitDataset( dataArray, resultList, feature, value) leftTree = CreateModelTree(subDataArray1, subResultList1, maxDataCount, minErrorReduce) rightTree = CreateModelTree(subDataArray2, subResultList2, maxDataCount, minErrorReduce) node = Node(feature, value, leftTree, rightTree) return node def ForecastByModelTree(tree, inData): if (tree.left == None and tree.right == None): return (numpy.mat(inData) * tree.value).tolist()[0][0] if (inData[tree.feature] > tree.value): return ForecastByModelTree(tree.left, inData) else: return ForecastByModelTree(tree.right, inData) if __name__ == '__main__': dataArray, resultList = LoadDataAndLabel("Dataset2.txt") tree = CreateModelTree(dataArray, resultList, 4, 0.1) #ShowTree(tree) print(ForecastByModelTree(tree, [0.530897])) print(ForecastByModelTree(tree, [0.993349]))
#Caluclate the b in two conditions b1 = b - Ei - labelMatrix[i] * ( newAlphaI - alphas[i]) * dataMatrix[ i, :] * dataMatrix[i, :].T - labelMatrix[j] * ( newAlphaJ - alphas[j] ) * dataMatrix[i, :] * dataMatrix[j, :].T b2 = b - Ej - labelMatrix[i] * ( newAlphaI - alphas[i]) * dataMatrix[ i, :] * dataMatrix[j, :].T - labelMatrix[j] * ( newAlphaJ - alphas[j] ) * dataMatrix[j, :] * dataMatrix[j, :].T #Update the alpha[i], alpha[j] and b alphas[i] = newAlphaI alphas[j] = newAlphaJ if (0 < alphas[i] and alphas[i] < border): b = b1 elif (0 < alphas[j] and alphas[j] < border): b = b2 else: b = (b1 + b2) / 2.0 iter = iter + 1 return alphas, b if __name__ == '__main__': dataArray, labelList = LoadDataAndLabel("Dataset.txt") alphas, b = smoSimple(dataArray, labelList, 0.6, 0.001, 100) print(alphas) print(b)
errorCount += 1 print("The error rate:%f" % (errorCount / float(numToTest))) #绘图 def Plot(dataMatrix, labelList): fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(dataMatrix[:, 1],dataMatrix[:, 2]) ax.scatter(dataMatrix[:, 1],dataMatrix[:, 2], 15.0 * array(labelList), 15.0 * array(labelList)) plt.show() def ClassifyPerson(normMatrix, ranges, minValues, labelList): resultList = ['Not in all','In small doses','In large doses'] percentTats = float(input("Percentage of time spent playing video game?")) ffMiles = float(input("Frequent flier miles earned per year?")) iceCream = float(input("Liters of ice cream consumed per year?")) inArray = [ffMiles,percentTats,iceCream] classifierResult = Classify0((inArray)/ranges,normMatrix,labelList,3) print("You will probably like this person:",resultList[int(classifierResult)-1]) if __name__ == '__main__': dataArray,labelList = LoadDataAndLabel('DatingTestSet2.txt') dataMatrix = array(dataArray) normMatrix,ranges,minValues = AutoNorm(dataMatrix) DataClassTest(normMatrix, ranges, minValues, labelList) ClassifyPerson(normMatrix, ranges, minValues, labelList) Plot(dataMatrix, labelList)
x1 = min(X) y1 = w[0] + x1 * w[1] x2 = max(X) y2 = w[0] + x2 * w[1] ax.plot([x1, x2], [y1, y2], 'r-') sortedXIndex = numpy.argsort(X) for index in range(len(X) - 1): ax.plot([X[sortedXIndex[index]], X[sortedXIndex[index + 1]]], [ LWLRPredY[sortedXIndex[index]], LWLRPredY[sortedXIndex[index + 1]] ], 'g--') matplotlib.pyplot.show() if __name__ == '__main__': dataArray, labelList = LoadDataAndLabel('Dataset1.txt') w = StandRegres(dataArray, labelList) StandPredY = [numpy.mat(dataArray[i]) * w for i in range(len(dataArray))] LWLRPredY = [ LocalWeightLinearRegress(data, dataArray, labelList, 0.01).tolist()[0][0] for data in dataArray ] w1 = RidgeRegres(dataArray, labelList) RidgePredY = [(w1[0] + dataArray[i][1] * w1[1]).tolist()[0][0] for i in range(len(dataArray))] x = [i[1] for i in dataArray] Plot(x, labelList, w.flatten().A[0], LWLRPredY) print(RegError(labelList, StandPredY)) print(RegError(labelList, RidgePredY))