def plotJwiththeta2(x, y):
    m, n = shape(x)  #m: number of training example; n: number of features
    x = c_[ones(m), x]  #add x0
    x = mat(x)  # to matrix
    y = mat(y)
    maxcycle = 90000
    theta = zeros((n + 1, 1))  #initial theta
    theta[1] = -50
    J = []
    theta1 = zeros((maxcycle, 1))
    for i in range(maxcycle):
        h = lgr.sigmoid(x * theta)
        theta[1] = theta[1] + 0.003
        theta1[i] = theta[1]
        #直接用append会出错,没有中间变量地址
        cost = lgr.costfunction(y, h)
        J.append(cost)
    #    print theta[1]

    fig = plt.figure()
    plt.plot(theta1, J)
    plt.xlabel('theta1')
    plt.ylabel('J')
    plt.show()
    return
示例#2
0
def stocGradAscent0(dataMatrix, classLabels):
    m, n = shape(dataMatrix)
    alpha = 0.5
    weights = ones(n)  #initialize to all ones
    weightsHistory = zeros((500 * m, n))
    for j in range(500):
        for i in range(m):
            h = logRegres.sigmoid(sum(dataMatrix[i] * weights))
            error = classLabels[i] - h
            weights = weights + alpha * error * dataMatrix[i]
            weightsHistory[j * m + i, :] = weights
    return weightsHistory
def stocGradAscent0(dataMatrix, classLabels):
    m,n = shape(dataMatrix)
    alpha = 0.5
    weights = ones(n)   #initialize to all ones
    weightsHistory=zeros((500*m,n))
    for j in range(500):
        for i in range(m):
            h = logRegres.sigmoid(sum(dataMatrix[i]*weights))
            error = classLabels[i] - h
            weights = weights + alpha * error * dataMatrix[i]
            weightsHistory[j*m + i,:] = weights
    return weightsHistory
def classifyVector(inX, weights):
    """
    Logistic回归分类函数
    :param inX:
    :param weights:
    :return:
    """
    prob = logRegres.sigmoid(sum(inX * weights))
    if prob > 0.5:
        return 1.0
    else:
        return 0.0
示例#5
0
def stocGradAscent1(dataMatrix, classLabels):
    m, n = shape(dataMatrix)
    alpha = 0.4
    weights = ones(n)  #initialize to all ones
    weightsHistory = zeros((40 * m, n))
    for j in range(40):
        dataIndex = range(m)
        for i in range(m):
            alpha = 4 / (1.0 + j + i) + 0.01
            randIndex = int(random.uniform(0, len(dataIndex)))
            h = logRegres.sigmoid(sum(dataMatrix[randIndex] * weights))
            error = classLabels[randIndex] - h
            #print error
            weights = weights + alpha * error * dataMatrix[randIndex]
            weightsHistory[j * m + i, :] = weights
            del (dataIndex[randIndex])
    print(weights)
    return weightsHistory
def stocGradAscent1(dataMatrix, classLabels):
    m,n = shape(dataMatrix)
    alpha = 0.4
    weights = ones(n)   #initialize to all ones
    weightsHistory=zeros((40*m,n))
    for j in range(40):
        dataIndex = range(m)
        for i in range(m):
            alpha = 4/(1.0+j+i)+0.01
            randIndex = int(random.uniform(0,len(dataIndex)))
            h = logRegres.sigmoid(sum(dataMatrix[randIndex]*weights))
            error = classLabels[randIndex] - h
            #print error
            weights = weights + alpha * error * dataMatrix[randIndex]
            weightsHistory[j*m + i,:] = weights
            del(dataIndex[randIndex])
    print weights
    return weightsHistory
def stoc_grad_ascent0(data_matrix, class_label):
    """
    随机梯度
    这个算法不太好 至少误判了三分之一的数据
    :param data_matrix:
    :param class_label:
    :return:
    """
    m, n = data_matrix.shape
    # 步长
    alpha = 0.01
    # 初始化所有的回归系数为1
    weights = np.ones(n)
    for i in range(m):
        # 先每一个样本与对应回归系数weights相乘 再求和 最后在带入sigmoid求值
        h = logRegres.sigmoid(np.sum(data_matrix[i] * weights))
        # 绝对误差
        error = class_label[i] - h
        # 修正误差
        weights = weights + alpha * error * data_matrix[i]
    return weights
def stoc_grad_ascent1(data_matrix, class_label, max_iter=150):
    """
    改进版随机梯度
    :param data_matrix:
    :param class_label:
    :return:
    """
    m, n = data_matrix.shape
    # 初始化所有的回归系数为1
    weights = np.ones(n)
    for j in range(max_iter):
        data_index = list(range(m))
        for i in range(m):
            # 步长
            alpha = 4 / (1.0 + j + i) + 0.01
            rand_index = int(np.random.uniform(0, len(data_index)))
            # 先每一个样本与对应回归系数weights相乘 再求和 最后在带入sigmoid求值
            h = logRegres.sigmoid(np.sum(data_matrix[rand_index] * weights))
            # 绝对误差
            error = class_label[rand_index] - h
            # 修正误差
            weights = weights + alpha * error * data_matrix[rand_index]
            del data_index[rand_index]
    return weights
示例#9
0
y = np.array([[1, 1, 0, 1]]).T

# seed random numbers to make calculation
# deterministic (just a good practice)
np.random.seed(1)

# initialize weights randomly with mean 0
syn0 = 2 * np.random.random((3, 1)) - 1

print 'X:', X
print 'y:', y
print 'syn0:', syn0

# matrix multiplicative
print 'dot:', np.dot(X, syn0)
print 'dot:', logRegres.sigmoid(np.dot(X, syn0))

for iterator in xrange(1000):
    # forward propagation
    l0 = X
    l1 = nonlin(np.dot(l0, syn0))

    # how much did we miss?
    l1_error = y - l1

    # multiply how much we missed by the
    # slope of the sigmssoid at the values in l1
    l1_delta = l1_error * nonlin(l1, True)

    # update weights
    syn0 += np.dot(l0.T, l1_delta)
示例#10
0
def classifyVector(inX, weight):
    prob = logRegres.sigmoid(sum(inX * weight))
    if prob > 0.5:
        return 1.0
    else:
        return 0.0
示例#11
0
def classifyVector(inX, weights):
    prob = logRegres.sigmoid(np.sum(inX * weights))
    return 1.0 if prob > 0.5 else 0.0
示例#12
0
def predict(inX, weights):
    probability = sigmoid(sum(np.array(inX) * weights))
    if probability > 0.5:
        return 1
    else:
        return 0
示例#13
0
def classifyVector(inX, weights):
    prob = lr.sigmoid(sum(inX*weights))
    if prob > 0.5:
        return 1
    else:
        return 0
示例#14
0
def classifyVector(inX, weights):
    prob = lr.sigmoid(sum(inX * weights))
    if prob > 0.5:
        return 1
    else:
        return 0
示例#15
0
# coding=utf-8
import numpy
import logRegres

dataMat, labelMat = logRegres.loadDataSet()
weights = logRegres.gradAscent(dataMat, labelMat)
print weights
#logRegres.plotBestFit(weights.getA())
weights = logRegres.stocGradAscent0(dataMat, labelMat)
print '--随机梯度'
#logRegres.plotBestFit(weights)

weights = logRegres.stocGradAscent1(dataMat, labelMat)
print '--改进的随机梯度'
logRegres.plotBestFit(weights)

import matplotlib.pyplot as plt

fig = plt.figure()
ax = fig.add_subplot(111)

x = numpy.arange(-8.0, 8.0, 0.2)
print logRegres.sigmoid(-0.3)
# 设置偏移量 不是所有数据都是x=0划分
y = [logRegres.sigmoid(xi - 0.1) for xi in x]
ax.plot(x, y)
plt.xlabel('X1')
plt.ylabel('X2')
#plt.show()
print '--预测病马'
logRegres.multiTest()