def plotJwiththeta2(x, y): m, n = shape(x) #m: number of training example; n: number of features x = c_[ones(m), x] #add x0 x = mat(x) # to matrix y = mat(y) maxcycle = 90000 theta = zeros((n + 1, 1)) #initial theta theta[1] = -50 J = [] theta1 = zeros((maxcycle, 1)) for i in range(maxcycle): h = lgr.sigmoid(x * theta) theta[1] = theta[1] + 0.003 theta1[i] = theta[1] #直接用append会出错,没有中间变量地址 cost = lgr.costfunction(y, h) J.append(cost) # print theta[1] fig = plt.figure() plt.plot(theta1, J) plt.xlabel('theta1') plt.ylabel('J') plt.show() return
def stocGradAscent0(dataMatrix, classLabels): m, n = shape(dataMatrix) alpha = 0.5 weights = ones(n) #initialize to all ones weightsHistory = zeros((500 * m, n)) for j in range(500): for i in range(m): h = logRegres.sigmoid(sum(dataMatrix[i] * weights)) error = classLabels[i] - h weights = weights + alpha * error * dataMatrix[i] weightsHistory[j * m + i, :] = weights return weightsHistory
def stocGradAscent0(dataMatrix, classLabels): m,n = shape(dataMatrix) alpha = 0.5 weights = ones(n) #initialize to all ones weightsHistory=zeros((500*m,n)) for j in range(500): for i in range(m): h = logRegres.sigmoid(sum(dataMatrix[i]*weights)) error = classLabels[i] - h weights = weights + alpha * error * dataMatrix[i] weightsHistory[j*m + i,:] = weights return weightsHistory
def classifyVector(inX, weights): """ Logistic回归分类函数 :param inX: :param weights: :return: """ prob = logRegres.sigmoid(sum(inX * weights)) if prob > 0.5: return 1.0 else: return 0.0
def stocGradAscent1(dataMatrix, classLabels): m, n = shape(dataMatrix) alpha = 0.4 weights = ones(n) #initialize to all ones weightsHistory = zeros((40 * m, n)) for j in range(40): dataIndex = range(m) for i in range(m): alpha = 4 / (1.0 + j + i) + 0.01 randIndex = int(random.uniform(0, len(dataIndex))) h = logRegres.sigmoid(sum(dataMatrix[randIndex] * weights)) error = classLabels[randIndex] - h #print error weights = weights + alpha * error * dataMatrix[randIndex] weightsHistory[j * m + i, :] = weights del (dataIndex[randIndex]) print(weights) return weightsHistory
def stocGradAscent1(dataMatrix, classLabels): m,n = shape(dataMatrix) alpha = 0.4 weights = ones(n) #initialize to all ones weightsHistory=zeros((40*m,n)) for j in range(40): dataIndex = range(m) for i in range(m): alpha = 4/(1.0+j+i)+0.01 randIndex = int(random.uniform(0,len(dataIndex))) h = logRegres.sigmoid(sum(dataMatrix[randIndex]*weights)) error = classLabels[randIndex] - h #print error weights = weights + alpha * error * dataMatrix[randIndex] weightsHistory[j*m + i,:] = weights del(dataIndex[randIndex]) print weights return weightsHistory
def stoc_grad_ascent0(data_matrix, class_label): """ 随机梯度 这个算法不太好 至少误判了三分之一的数据 :param data_matrix: :param class_label: :return: """ m, n = data_matrix.shape # 步长 alpha = 0.01 # 初始化所有的回归系数为1 weights = np.ones(n) for i in range(m): # 先每一个样本与对应回归系数weights相乘 再求和 最后在带入sigmoid求值 h = logRegres.sigmoid(np.sum(data_matrix[i] * weights)) # 绝对误差 error = class_label[i] - h # 修正误差 weights = weights + alpha * error * data_matrix[i] return weights
def stoc_grad_ascent1(data_matrix, class_label, max_iter=150): """ 改进版随机梯度 :param data_matrix: :param class_label: :return: """ m, n = data_matrix.shape # 初始化所有的回归系数为1 weights = np.ones(n) for j in range(max_iter): data_index = list(range(m)) for i in range(m): # 步长 alpha = 4 / (1.0 + j + i) + 0.01 rand_index = int(np.random.uniform(0, len(data_index))) # 先每一个样本与对应回归系数weights相乘 再求和 最后在带入sigmoid求值 h = logRegres.sigmoid(np.sum(data_matrix[rand_index] * weights)) # 绝对误差 error = class_label[rand_index] - h # 修正误差 weights = weights + alpha * error * data_matrix[rand_index] del data_index[rand_index] return weights
y = np.array([[1, 1, 0, 1]]).T # seed random numbers to make calculation # deterministic (just a good practice) np.random.seed(1) # initialize weights randomly with mean 0 syn0 = 2 * np.random.random((3, 1)) - 1 print 'X:', X print 'y:', y print 'syn0:', syn0 # matrix multiplicative print 'dot:', np.dot(X, syn0) print 'dot:', logRegres.sigmoid(np.dot(X, syn0)) for iterator in xrange(1000): # forward propagation l0 = X l1 = nonlin(np.dot(l0, syn0)) # how much did we miss? l1_error = y - l1 # multiply how much we missed by the # slope of the sigmssoid at the values in l1 l1_delta = l1_error * nonlin(l1, True) # update weights syn0 += np.dot(l0.T, l1_delta)
def classifyVector(inX, weight): prob = logRegres.sigmoid(sum(inX * weight)) if prob > 0.5: return 1.0 else: return 0.0
def classifyVector(inX, weights): prob = logRegres.sigmoid(np.sum(inX * weights)) return 1.0 if prob > 0.5 else 0.0
def predict(inX, weights): probability = sigmoid(sum(np.array(inX) * weights)) if probability > 0.5: return 1 else: return 0
def classifyVector(inX, weights): prob = lr.sigmoid(sum(inX*weights)) if prob > 0.5: return 1 else: return 0
def classifyVector(inX, weights): prob = lr.sigmoid(sum(inX * weights)) if prob > 0.5: return 1 else: return 0
# coding=utf-8 import numpy import logRegres dataMat, labelMat = logRegres.loadDataSet() weights = logRegres.gradAscent(dataMat, labelMat) print weights #logRegres.plotBestFit(weights.getA()) weights = logRegres.stocGradAscent0(dataMat, labelMat) print '--随机梯度' #logRegres.plotBestFit(weights) weights = logRegres.stocGradAscent1(dataMat, labelMat) print '--改进的随机梯度' logRegres.plotBestFit(weights) import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) x = numpy.arange(-8.0, 8.0, 0.2) print logRegres.sigmoid(-0.3) # 设置偏移量 不是所有数据都是x=0划分 y = [logRegres.sigmoid(xi - 0.1) for xi in x] ax.plot(x, y) plt.xlabel('X1') plt.ylabel('X2') #plt.show() print '--预测病马' logRegres.multiTest()