def testSelectedFeatures2():
    LRModel = lr.LogisticRegression(0.001, 500)
    data1 = genRWNormalized()
    square = np.copy(data1)
    for i in range(len(data1[0]) - 1):
        colToAdd = np.power(data1[:, i], 2)
        square = np.insert(square, -1, colToAdd, axis=1)
        square = np.insert(square,
                           -1,
                           np.multiply(data1[:, 0], data1[:, 8]),
                           axis=1)
        square = np.insert(square,
                           -1,
                           np.multiply(data1[:, 0], data1[:, 7]),
                           axis=1)
        square = np.insert(square,
                           -1,
                           np.multiply(data1[:, 0], data1[:, 2]),
                           axis=1)
        square = np.insert(square,
                           -1,
                           np.multiply(data1[:, 5], data1[:, 6]),
                           axis=1)
        LRModel = lr.LogisticRegression(0.001, 100)
        featureSelection(LRModel, square, 3)
def testSelectedFeatures1():
    print("start testAdditionlSquaredFeatures()")
    LRModel = lr.LogisticRegression(0.001, 500)
    LDAModel = LDA.LDA()
    data1 = genRWNormalized()
    data2 = np.append(data1[:, [10, 1, 9, 6]],
                      np.array([data1[:, -1]]).T,
                      axis=1)
    data3 = addSquareFeature(data1, [10, 1, 9, 6])
    a1 = 0
    b1 = 0
    a2 = 0
    b2 = 0
    a3 = 0
    b3 = 0
    for i in range(3):
        np.random.shuffle(data1)
        np.random.shuffle(data2)
        np.random.shuffle(data3)
        a1 += LRKFoldValidation(LRModel, data1, 5)
        b1 += LDAKFoldValidation(LDAModel, data2, 5)
        a2 += LRKFoldValidation(LRModel, data2, 5)
        b2 += LDAKFoldValidation(LDAModel, data2, 5)
        a3 += LRKFoldValidation(LRModel, data3, 5)
        b3 += LDAKFoldValidation(LDAModel, data3, 5)
    print("Accuracy for lr in rw is {}".format(a1 / 3))
    print("Accuracy for LDA in rw is {}".format(b1 / 3))
    print("Accuracy for lr in rw is {}".format(a2 / 3))
    print("Accuracy for LDA in rw is {}".format(b2 / 3))
    print("Accuracy for lr in rw is {}".format(a3 / 3))
    print("Accuracy for LDA in rw is {}".format(b3 / 3))
Пример #3
0
    def __init__(self, rng, input, n_in, n_hidden, n_out):
        # 隠れ層
        self.hiddenLayer = HiddenLayer(rng=rng,
                                       input=input,
                                       n_in=n_in,
                                       n_out=n_hidden,
                                       activation=T.tanh)
        # 出力層
        self.logRegressionLayer = logistic_regression.LogisticRegression(
            input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out)

        # L1正則化項
        self.L1 = abs(self.hiddenLayer.W).sum() + abs(
            self.logRegressionLayer.W).sum()

        # L2正則化
        self.L2_sqr = ((self.hiddenLayer.W)**2).sum() + (
            (self.logRegressionLayer.W)**2).sum()

        # loss(出力層にのみ依存するのでロジスティック回帰と同じで良い)
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood

        # 誤差計算シンボル
        self.errors = self.logRegressionLayer.errors

        # パラメータ
        self.params = self.hiddenLayer.params + self.logRegressionLayer.params

        # self tracking input
        self.input = input
def testAlphaAndEpochs():
    rwClear = genRWClear()
    # learning rate: 0.0001 - 1, Iteration: 50 - 100000
    bestLearn = 0
    bestIte = 0
    learn = [0.001, 0.01, 0.1, 1]
    ite = [100, 500, 1000, 5000]
    max_acc = 0

    for i in learn:
        for j in ite:
            LRModel = lr.LogisticRegression(i, j)
            ave = 0.0
            for k in range(3):
                ac = LRKFoldValidation(LRModel, rwClear, 5)
                print("per k fold:", ac)
                ave += ac
            ave = ave / 3.0
            print("ave:", ave)
            if ave > max_acc:
                max_acc = ave
                bestLearn = i
                bestIte = j
            print(ave, " ", i, " ", j)
    print(bestLearn)
    print(bestIte)
    print(max_acc)
 def test_init(self):
     lr_model = lr.LogisticRegression(
         max_iter=10, tol=1e-6, learning_rate=1e-3, random_state=2
     )
     assert lr_model.max_iter == 10
     assert lr_model.tol == 1e-6
     assert lr_model.learning_rate == 1e-3
     assert lr_model.random_state == 2
Пример #6
0
def test_predictions():
    X = np.array(dataset)[:, :-1]
    y = np.array(dataset)[:, -1]

    lr = logistic_regression.LogisticRegression(learning_rate=0.01,
                                                num_iterations=10**6,
                                                verbose=10000)
    lr.coef_ = coef  # inject model coefficient
    assert all(lr.predict(X) == np.array(y, dtype=bool))
Пример #7
0
def testLRWithWine(a, epochs):
    data = genDataWOHeader(file_path1)
    qualityToCategory(data)
    np.random.shuffle(data)
    #data1= removeOutLiersByND(data2)
    testSet, trainSet = seperateTestSet(data)
    #trainSet=np.insert(trainSet, trainSet.shape[1]-1,np.ones((trainSet.shape[0],1),dtype=float),axis=1)
    aModel = lr.LogisticRegression(a, epochs)
    return LRKFoldValidation(aModel, data, 5)
def MyLogRegTester(data, num_iters=10):
    X, y = [], []
    filename = "predictions.txt"
    for line in data:
        line1 = list(map(lambda x: float(x), line[1:len(line) - 1]))
        X.append(line1)
        y.append(line[len(line) - 1])

    X = np.asarray(X)
    X = LogReg.min_max_normalization(X)

    data = []
    for i in range(len(X)):
        dat = []
        dat.append(X[i])
        dat.append(y[i])
        data.append(dat)
    all_acc = 0
    print("My Logistic Regression implementation : ")
    file_exception = False
    try:
        f = open(filename, 'w')
        f.write("#y_predicted,y_actual\n")
        f.close()
    except Exception as e:
        print("Unable to do file operations. Error : ", e)
        file_exception = True
    for i in range(num_iters):
        random.shuffle(data)
        X, y = [], []
        for dat in data:
            X.append(dat[0])
            y.append(dat[1])
        X = np.asarray(X)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.33)

        classifier = LogReg.LogisticRegression(X_train, y_train)
        classifier.train()
        #classifier.plot_cost_function()
        if not file_exception:
            acc = classifier.calculate_accuracy(X_test,
                                                y_test,
                                                write_predictions_to_file=True,
                                                filename=filename)
        else:
            acc = classifier.calculate_accuracy(X_test, y_test)
        print("Accuracy of trial ", i + 1, " : ", acc)
        all_acc += acc

    print(
        "Mean Accuracy of 10 trials of My Logistic Regression implementation : ",
        all_acc / 10)
        sign = int(0.5*sign+0.5)
        # print(sign, Category[i])
        if sign != Category[i][0]:
            count+=1
    return count/(2*N)

N, d, Ntime, dl = 1000, 10, 100, 0.1
initialState = LinearInseparableData(N, d, Ntime, dl)
initialState.run()

xList = initialState.xList_4d # [x, y, x2, y2]
category = initialState.category
colorLine = initialState.Color

cycleMax, stepSize = 50000, 0.05
LRsolution = logistic_regression.LogisticRegression(xList, category, cycleMax, stepSize)
theta_solu = LRsolution.run()
xBound, yBound = boundaryLine(theta_solu)
plotResult(xList)











Пример #10
0
 def test_has_converged(self, coef, X):
     lr_model = lr.LogisticRegression()
     p = lr.predict_proba(coef, X)
     assert lr_model._has_converged(coef, X, p)
     assert not lr_model._has_converged(np.array([1, 1000]), X, p)
Пример #11
0
 def test_gradient_descent_computes_gradient(self, X, y):
     with patch_with_mock(lr, "logistic_gradient"):
         lr_model = lr.LogisticRegression(max_iter=5)
         lr_model.fit(X, y)
         assert lr.logistic_gradient.call_count >= 5
Пример #12
0
    d = feature_keeper(d, ['pay_1', 'limit_bal', 'any_late_pay'], 'default')
    X, y = pipeline.get_X_y(d)
    return d, X, y


d_train, X_train, y_train = process_data(d_train)
d_valid, X_valid, y_valid = process_data(d_valid)
d_test, X_test, y_test = process_data(d_test)

## train model
scaler = pipeline.MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)

lr = logistic_regression.LogisticRegression(learning_rate=0.1,
                                            num_iterations=10**3,
                                            verbose=100)
lr.fit(X_train, y_train)

# plot relative feature importance
pd.Series(
    lr.coef_[1:] / np.abs(lr.coef_[1:]).sum(),
    index=d_train.columns[:-1]).plot.bar(title='Relative feature importance')
pd.Series(lr.coef_[1:] / np.abs(lr.coef_[1:]).sum(),
          index=d_train.columns[:-1]).abs().sort_values(
              ascending=False).plot.bar(title='Relative feature importance')

## validate model
X_valid_scaled = scaler.transform(X_valid)
pm = pipeline.PerformanceMetric(lr.predict(X_valid_scaled), y_valid)
print('f1:', pm.f1_score)
Пример #13
0
    node_id = comm.Get_rank()
    nb_nodes = comm.size

    log = get_logger(node_id)

    # Initialize the graph (grid with nb_nodes nodes)
    grid_graph = graph.Grid(nb_nodes, args.seed, tau=args.tau)

    # Initialize the synthetic dataset
    dataset = dataset.ClassificationDataset(seed=args.seed,
                                            nb_points=args.nb_points_per_node *
                                            nb_nodes,
                                            d=args.d)

    # Initialize the model
    model = logistic_regression.LogisticRegression(dataset, nb_nodes * args.c)

    # Initialize the algorithm
    algo = adfs.ADFS(comm=comm,
                     seed=args.seed,
                     graph=grid_graph,
                     model=model,
                     log=log)

    # Run the algorithm
    algo.run(args.n_steps)

    # Plot the error
    if node_id == 0:
        min_error = min(algo.error)
        plot(algo.time, algo.error, min_error)
Пример #14
0
import numpy as np
import logistic_regression as lg

train_data = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 0]],
                      dtype=np.float16)
test_data = np.array([])

model = lg.LogisticRegression(lr=0.01, datas=train_data, epoch=10000, err=0.01)
init_w = np.copy(model.w)
model.train()
lg.draw(model=model, init_w=init_w, train_data=train_data, test_data=test_data)
Пример #15
0
import load_mnist
import logistic_regression as lr
from theano import tensor as T

mnist_file = 'mnist.pkl.gz'

y = T.ivector('y')
x = T.matrix('x')
classifier = lr.LogisticRegression(input=x, n_in=28 * 28, n_out=10)

cost = classifier.negative_log_likelihood(y)

g_W = T.grad(cost=cost, wrt=classifier.W)
g_b = T.grad(cost=cost, wrt=classifier.b)

updates = [(classifier.W, classifier.W - learning_rate * g_W),
           (classifier.b, classifier.b - learning_rate * g_b)]

train_model = theano.function(
    inputs=[index],
    outputs=cost,
    updates=updates,
    givens={
        x: train_set_x[index * batch_size:(index + 1) * batch_size],
        y: train_set_y[index * batch_size:(index + 1) * batch_size]
    })

test_model = theano.function(
    inputs=[index],
    outputs=classifier.errors(y),
    givens={
    for i in learn:
        for j in ite:
            LRModel = lr.LogisticRegression(i, j)
            ave = 0.0
            for k in range(3):
                ac = LRKFoldValidation(LRModel, rwClear, 5)
                print("per k fold:", ac)
                ave += ac
            ave = ave / 3.0
            print("ave:", ave)
            if ave > max_acc:
                max_acc = ave
                bestLearn = i
                bestIte = j
            print(ave, " ", i, " ", j)
    print(bestLearn)
    print(bestIte)
    print(max_acc)


LRModel = lr.LogisticRegression(0.001, 500)
LDAModel = LDA.LDA()
rwNormalized = genRWNormalized()
cancerNormalized = genCancerNormalized()
rwNormalized = genRWNormalized()
print(LRKFoldValidation(LRModel, cancerNormalized, 5))
print(LDAKFoldValidation(LDAModel, cancerNormalized, 5))
print(LRKFoldValidation(LRModel, rwNormalized, 5))
print(LDAKFoldValidation(LDAModel, rwNormalized, 5))
def testDataPreprocess():
    rwData = genRW()
    cancerData = genCancer()
    rwNormalized = genRWNormalized()
    cancerNormalized = genCancerNormalized()
    rwRemovedOL = genRWRemovedOL()
    cancerRemovedOL = genCancerRemovedOL()
    rwClear = genRWClear()
    cancerClear = genCancerClear()
    LRModel = lr.LogisticRegression(0.001, 500)
    LDAModel = LDA.LDA()

    a = 0
    b = 0
    c = 0
    d = 0
    for i in range(3):
        np.random.shuffle(rwData)
        np.random.shuffle(cancerData)
        a += LRKFoldValidation(LRModel, rwData, 5)
        b += LDAKFoldValidation(LDAModel, rwData, 5)
        c += LRKFoldValidation(LRModel, cancerData, 5)
        d += LDAKFoldValidation(LDAModel, cancerData, 5)

    print(a / 3)
    print(b / 3)
    print(c / 3)
    print(d / 3)

    a2 = 0
    b2 = 0
    c2 = 0
    d2 = 0
    for i in range(3):
        np.random.shuffle(rwNormalized)
        np.random.shuffle(cancerNormalized)
        a2 += LRKFoldValidation(LRModel, rwNormalized, 5)
        b2 += LDAKFoldValidation(LDAModel, rwNormalized, 5)
        c2 += LRKFoldValidation(LRModel, cancerNormalized, 5)
        d2 += LDAKFoldValidation(LDAModel, cancerNormalized, 5)
    print(a2 / 3)
    print(b2 / 3)
    print(c2 / 3)
    print(d2 / 3)

    a3 = 0
    b3 = 0
    c3 = 0
    d3 = 0
    for i in range(3):
        np.random.shuffle(rwClear)
        np.random.shuffle(cancerClear)
        a3 += LRKFoldValidation(LRModel, rwClear, 5)
        b3 += LDAKFoldValidation(LDAModel, rwClear, 5)
        c3 += LRKFoldValidation(LRModel, cancerClear, 5)
        d3 += LDAKFoldValidation(LDAModel, cancerClear, 5)
    print(a3 / 3)
    print(b3 / 3)
    print(c3 / 3)
    print(d3 / 3)

    a4 = 0
    b4 = 0
    c4 = 0
    d4 = 0
    for i in range(3):
        np.random.shuffle(rwRemovedOL)
        np.random.shuffle(cancerRemovedOL)
        a4 += LRKFoldValidation(LRModel, rwRemovedOL, 5)
        b4 += LDAKFoldValidation(LDAModel, rwRemovedOL, 5)
        c4 += LRKFoldValidation(LRModel, cancerRemovedOL, 5)
        d4 += LDAKFoldValidation(LDAModel, cancerRemovedOL, 5)
    print(a4 / 3)
    print(b4 / 3)
    print(c4 / 3)
    print(d4 / 3)
Пример #18
0
def featureSelection(data, isLR):
    selectedFeatureNum = []
    selectedFeatureArray = -1
    bestAccuracyAll = 0
    y_2d = np.array([data[:, -1]]).T
    #print(y_2d)
    for i in range(data.shape[1] - 1):
        featureToAdd = -1
        bestAccuracy = 0
        column_2d = -1
        print("select feature{}".format(i))
        if i == 0:
            for j in range(data.shape[1] - 1):
                if (j in selectedFeatureNum) == False:
                    column_2d = np.array([data[:, j]]).T
                    nums = selectedFeatureNum + [j]

                    # ------5 should be changed --
                    #print(np.concatenate((column_2d,y_2d), axis = 1))
                    if isLR:
                        model = lr.LogisticRegression(0.001, 500)
                        accuracy = LRKFoldValidation(
                            model, np.concatenate((column_2d, y_2d), axis=1),
                            5)
                    else:
                        model = LDA.LDA()
                        accuracy = LDAKFoldValidation(
                            model, np.concatenate((column_2d, y_2d), axis=1),
                            5)

                    print("Using feature(s){} accuracy is{}".format(
                        nums, accuracy))
                    if accuracy >= bestAccuracy:
                        bestAccuracy = accuracy
                        featureToAdd = j
            selectedFeatureArray = column_2d
            bestAccuracyAll = bestAccuracy
            selectedFeatureNum.append(featureToAdd)
            continue
        else:
            #try add feature from the rest of set
            for j in range(data.shape[1] - 1):
                if (j in selectedFeatureNum) == False:
                    column_2d = np.array([data[:, j]]).T
                    nums = selectedFeatureNum + [j]

                    # ------5 should be changed ---
                    #print(np.concatenate((selectedFeatureArray, column_2d , y_2d), axis = 1))
                    if isLR:
                        model = lr.lr.LogisticRegression(0.001, 500)
                        accuracy = LRKFoldValidation(
                            model,
                            np.concatenate(
                                (selectedFeatureArray, column_2d, y_2d),
                                axis=1), 5)
                    else:
                        model = LDA.LDA
                        accuracy = LDAKFoldValidation(
                            model,
                            np.concatenate(
                                (selectedFeatureArray, column_2d, y_2d),
                                axis=1), 5)
                    print("Using feature(s){} accuracy is{}".format(
                        nums, accuracy))
                    if accuracy >= bestAccuracy:
                        bestAccuracy = accuracy
                        featureToAdd = j

        #additional feature cannot improve performance by 1%
        if bestAccuracyAll >= bestAccuracy:
            print("maxima reached")
            break
        else:
            #add addtional feature
            bestAccuracyAll = bestAccuracy
            selectedFeatureNum.append(featureToAdd)
            selectedFeatureArray = np.concatenate(
                (selectedFeatureArray, np.array([data[:, featureToAdd]]).T),
                axis=1)
    print(
        "feature selection ended, best performing features are {}, the accuracy is {}"
        .format(selectedFeatureNum, bestAccuracyAll))
    return selectedFeatureNum, selectedFeatureArray
Пример #19
0
# A simple example

import logistic_regression as lr

if __name__ == '__main__':

    # Change LABEL_NAME_0 ans LABEL_NAME_1 into the value of label of your dataset, like'Iris-setosa' or 'Iris-versicolor'.
    logistic_regression = lr.LogisticRegression('Iris-setosa',
                                                'Iris-versicolor')

    # Change DATASET_PATH into you path of dataset 'iris.data'.
    # After this step, the data will be loaded and initialized.
    logistic_regression.set_data_from_file('iris.data')

    # The calculate method is the implement of logistic regression with newton method.
    logistic_regression.calculate()

    # By this step, it can generate a simple diagram of the labels and vectors of input data and it can draw a line which represent the result of logistic regression.
    logistic_regression.show()