def logistic_l1():
    traindir ='./data/training'
    testdir = './data/test'
    
    tokens = list(nb.read('tokens'))
    train_x,train_y,category = vec.func2(traindir,tokens)
    train_x = np.array(train_x)
    train_y = np.array(train_y)
    print train_x.shape
    clf = LogisticRegression(penalty='l2')
    clf.fit(train_x,train_y)

    category = nb.read('category')
    result,test_x,test_file= vec.func3(testdir,tokens,category)
    test_x = np.array(test_x)
    print test_x.shape
    
    predict = np.array(clf.predict(test_x))
    
    test_file = np.array(test_file)
    predict = np.column_stack((test_file,predict))
    
    category = nb.read('category_nb_eventmodel')
    category_convert = nb.convert(category)
    result = nb.read('result')
    path = './data/logistic_l1.csv'
    evaluate = nb.sta_result(predict,category_convert,result,path)
def logistic_l1():
    traindir = './data/training'
    testdir = './data/test'

    tokens = list(nb.read('tokens'))
    train_x, train_y, category = vec.func2(traindir, tokens)
    train_x = np.array(train_x)
    train_y = np.array(train_y)
    print train_x.shape
    clf = LogisticRegression(penalty='l2')
    clf.fit(train_x, train_y)

    category = nb.read('category')
    result, test_x, test_file = vec.func3(testdir, tokens, category)
    test_x = np.array(test_x)
    print test_x.shape

    predict = np.array(clf.predict(test_x))

    test_file = np.array(test_file)
    predict = np.column_stack((test_file, predict))

    category = nb.read('category_nb_eventmodel')
    category_convert = nb.convert(category)
    result = nb.read('result')
    path = './data/logistic_l1.csv'
    evaluate = nb.sta_result(predict, category_convert, result, path)
def logistic_own():
    train_x = nb.read('train_x')
    train_y = nb.read('train_y')
    category = nb.read('category')
    result =nb.read('result')
    test_x = nb.read('test_x')
    test_file = nb.read('test_file')
    m,n=train_x.shape
    temp = np.ones((m,1))
    train_x = np.column_stack((temp,train_x))
    
    temp = np.ones((len(test_x),1))
    test_x = np.column_stack((temp,test_x))
    
    predict = np.zeros((len(test_x),1))
    train_x = np.mat(train_x)
    train_y = np.mat(train_y).transpose()
    test_x = np.mat(test_x)
    #由于要实现多分类,我们可以通过多个二分类来实现预测
    for i in range(10):
        binary_y = np.mat(np.zeros((m,1)).astype(int))
        for index in range(len(train_y)):
            if train_y[index]==i:
                binary_y[index]=1
            else:
                binary_y[index]=0
        weight = np.mat(np.ones((n+1,1)))
        alpha = 0.0001
        maxitem = 100
        for k in range(maxitem):
            h = sigmoid(train_x*weight)
            #我们在计算代价函数的时候,不能简单的用公式实现,应当进行判断
            J = calj(binary_y,h,m)
            #J = 1.0/m*(-binary_y.transpose()*np.log2(h)-(1-binary_y.transpose())*np.log2(1-h))
            error = h-binary_y
            weight -= alpha*(train_x.transpose()*error)
        binary_predict = test_x*weight
        for index in range(len(binary_predict)):
            if binary_predict[index]>0:
                predict[index]=i

    predict = np.array(predict).astype(int)
    test_file = np.array(test_file)
    predict = np.column_stack((test_file,predict))
    
    category = nb.read('category_nb_eventmodel')
    category_convert = nb.convert(category)
    result = nb.read('result')
    path = './data/logistic_l1.csv'
    evaluate = nb.sta_result(predict,category_convert,result,path)
def logistic_own():
    train_x = nb.read('train_x')
    train_y = nb.read('train_y')
    category = nb.read('category')
    result = nb.read('result')
    test_x = nb.read('test_x')
    test_file = nb.read('test_file')
    m, n = train_x.shape
    temp = np.ones((m, 1))
    train_x = np.column_stack((temp, train_x))

    temp = np.ones((len(test_x), 1))
    test_x = np.column_stack((temp, test_x))

    predict = np.zeros((len(test_x), 1))
    train_x = np.mat(train_x)
    train_y = np.mat(train_y).transpose()
    test_x = np.mat(test_x)
    #由于要实现多分类,我们可以通过多个二分类来实现预测
    for i in range(10):
        binary_y = np.mat(np.zeros((m, 1)).astype(int))
        for index in range(len(train_y)):
            if train_y[index] == i:
                binary_y[index] = 1
            else:
                binary_y[index] = 0
        weight = np.mat(np.ones((n + 1, 1)))
        alpha = 0.0001
        maxitem = 100
        for k in range(maxitem):
            h = sigmoid(train_x * weight)
            #我们在计算代价函数的时候,不能简单的用公式实现,应当进行判断
            J = calj(binary_y, h, m)
            #J = 1.0/m*(-binary_y.transpose()*np.log2(h)-(1-binary_y.transpose())*np.log2(1-h))
            error = h - binary_y
            weight -= alpha * (train_x.transpose() * error)
        binary_predict = test_x * weight
        for index in range(len(binary_predict)):
            if binary_predict[index] > 0:
                predict[index] = i

    predict = np.array(predict).astype(int)
    test_file = np.array(test_file)
    predict = np.column_stack((test_file, predict))

    category = nb.read('category_nb_eventmodel')
    category_convert = nb.convert(category)
    result = nb.read('result')
    path = './data/logistic_l1.csv'
    evaluate = nb.sta_result(predict, category_convert, result, path)
def logistic_x():
    train_x,train_y,category,result,test_x,test_file = preprocess()
    clf = LogisticRegression(penalty='l1')
    clf.fit(train_x,train_y)
    
    predict = clf.predict(test_x)
    
    predict = np.array(predict)
    predict = np.column_stack((test_file,predict))
    
    category = nb.read('category_nb_eventmodel')
    category_convert = nb.convert(category)
    result = nb.read('result')
    path = './data/logistic_l1.csv'
    evaluate = nb.sta_result(predict,category_convert,result,path)
def logistic_x():
    train_x, train_y, category, result, test_x, test_file = preprocess()
    clf = LogisticRegression(penalty='l1')
    clf.fit(train_x, train_y)

    predict = clf.predict(test_x)

    predict = np.array(predict)
    predict = np.column_stack((test_file, predict))

    category = nb.read('category_nb_eventmodel')
    category_convert = nb.convert(category)
    result = nb.read('result')
    path = './data/logistic_l1.csv'
    evaluate = nb.sta_result(predict, category_convert, result, path)
def sto_logistic():
    train_x = nb.read('train_x')
    train_y = nb.read('train_y')
    category = nb.read('category')
    result =nb.read('result')
    test_x = nb.read('test_x')
    test_file = nb.read('test_file')
    m,n=train_x.shape
    temp = np.ones((m,1))
    train_x = np.column_stack((temp,train_x))
    
    temp = np.ones((len(test_x),1))
    test_x = np.column_stack((temp,test_x))
    
    predict = np.zeros((len(test_x),1))
    train_x = np.mat(train_x)
    train_y = np.mat(train_y).transpose()
    test_x = np.mat(test_x)
    #由于要实现多分类,我们可以通过多个二分类来实现预测
    for i in range(10):
        binary_y = np.mat(np.zeros((m,1)).astype(int))
        for index in range(len(train_y)):
            if train_y[index]==i:
                binary_y[index]=1
            else:
                binary_y[index]=0
        weight = np.mat(np.ones((n+1,1)))
        alpha = 0.001
        maxitem =5000
        for k in range(maxitem):
            index = random.randrange(m)
            h = sigmoid(train_x[index]*weight)
            error = h - binary_y[index]
            weight -= alpha*(train_x[index].transpose()*error)
        binary_predict = test_x*weight
        for index in range(len(binary_predict)):
            if binary_predict[index]>0:
                predict[index]=i

    predict = np.array(predict).astype(int)
    test_file = np.array(test_file)
    predict = np.column_stack((test_file,predict))
    
    category = nb.read('category_nb_eventmodel')
    category_convert = nb.convert(category)
    result = nb.read('result')
    path = './data/logistic_l1.csv'
    evaluate = nb.sta_result(predict,category_convert,result,path)
def sto_logistic():
    train_x = nb.read('train_x')
    train_y = nb.read('train_y')
    category = nb.read('category')
    result = nb.read('result')
    test_x = nb.read('test_x')
    test_file = nb.read('test_file')
    m, n = train_x.shape
    temp = np.ones((m, 1))
    train_x = np.column_stack((temp, train_x))

    temp = np.ones((len(test_x), 1))
    test_x = np.column_stack((temp, test_x))

    predict = np.zeros((len(test_x), 1))
    train_x = np.mat(train_x)
    train_y = np.mat(train_y).transpose()
    test_x = np.mat(test_x)
    #由于要实现多分类,我们可以通过多个二分类来实现预测
    for i in range(10):
        binary_y = np.mat(np.zeros((m, 1)).astype(int))
        for index in range(len(train_y)):
            if train_y[index] == i:
                binary_y[index] = 1
            else:
                binary_y[index] = 0
        weight = np.mat(np.ones((n + 1, 1)))
        alpha = 0.001
        maxitem = 5000
        for k in range(maxitem):
            index = random.randrange(m)
            h = sigmoid(train_x[index] * weight)
            error = h - binary_y[index]
            weight -= alpha * (train_x[index].transpose() * error)
        binary_predict = test_x * weight
        for index in range(len(binary_predict)):
            if binary_predict[index] > 0:
                predict[index] = i

    predict = np.array(predict).astype(int)
    test_file = np.array(test_file)
    predict = np.column_stack((test_file, predict))

    category = nb.read('category_nb_eventmodel')
    category_convert = nb.convert(category)
    result = nb.read('result')
    path = './data/logistic_l1.csv'
    evaluate = nb.sta_result(predict, category_convert, result, path)