Exemplo n.º 1
0
def detectChange(j, f):
    print("change {0} started".format(j));

    M = 2;
    h = 12;
    timespan = 6;
    size, speed = int(3600 / f), [];
    data = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/{0}/data.npy".format(f)));
    marks = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/{0}/marks.npy".format(f)));
    y1 = data[:size * (h + 1), j];
    X1 = np.mat(np.arange(y1.shape[0])).T;
    f1 = LinearRegression.RegressionSplineFunction(int((h + 1) * 60 / timespan) + M - 2, M);
    m1 = LinearRegression.LinearRegression().fit(X1, y1, [f1]);
    sY1 = m1.predictValue(X1);
    X1 = X1[:-size, :];
    y1 = y1[:-size, :];
    sY1 = sY1[:-size, :];
    if M == 3:
        speed.extend(getSpeedM3(m1.beta, f1.knots, X1).A.flatten().tolist());
    else:
        speed.extend(getSpeedM2(m1.beta, f1.knots, X1).A.flatten().tolist());

    for i in range(1, math.floor((data.shape[0] - size) / (size * h))):
        y2 = data[i * size * h - size:(i + 1) * size * h + size, j];
        X2 = np.mat(np.arange(y2.shape[0])).T;
        f2 = LinearRegression.RegressionSplineFunction(int((h + 2) * 60 / timespan) + M - 2, M);
        m2 = LinearRegression.LinearRegression().fit(X2, y2, [f2]);
        sY2 = m2.predictValue(X2);
        X2 = X2[size:-size, :];
        y2 = y2[size:-size, :];
        sY2 = sY2[size:-size, :];
        if M == 3:
            speed.extend(getSpeedM3(m2.beta, f2.knots, X2).A.flatten().tolist());
        else:
            speed.extend(getSpeedM2(m2.beta, f2.knots, X2).A.flatten().tolist());

        plt.figure(1, (12, 8));
        plt.get_current_fig_manager().window.maximize();
        plt.subplot(211);
        plt.title(str(i - 1));
        plt.plot(X1.A.flatten(), y1.A.flatten(), "-x");
        plt.plot(X1.A.flatten(), sY1.A.flatten(), color = "red");
        plt.subplot(212);
        plt.title(str(i));
        plt.plot(X2.A.flatten(), y2.A.flatten(), "-x");
        plt.plot(X2.A.flatten(), sY2.A.flatten(), color = "red");
        plt.show(block = True);
        plt.close();

        X1, y1, sY1 = X2, y2, sY2;
    print("change history completed.");

    speed = np.mat(speed).T;
    speedMean, speedStd = speed.mean(), speed.std();
    plt.figure(1, (12, 8));
    plt.get_current_fig_manager().window.maximize();
    plt.hist(speed.A.flatten(), bins = 1000);
    plt.show(block = True);
    plt.close();
Exemplo n.º 2
0
def test_Linear_Regression():
    boston = datasets.load_boston()
    X = boston.data
    y = boston.target

    print(X.shape)  # (500,13)

    X = X[y < 50.0]
    y = y[y < 50.0]

    X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)
    # 如果使用梯度下降需要标准化数据,不然会造成内存溢出
    scaler = StandardScaler()
    scaler.fit(X_train)
    standard_X_train = scaler.transform(X_train)
    standard_X_test = scaler.transform(X_test)
    print('-------------------正规公式计算-----------------------------')
    # 正规公式法求解线性回归
    start_time = datetime.datetime.now()
    reg = LinearRegression()
    reg.fit_normal(X_train, y_train)  # 无需归一化
    end_time = datetime.datetime.now()
    print(reg.coef_)
    print(reg.intercept_)
    print(reg.score(X_test, y_test))
    print('use time:', (end_time - start_time).microseconds)
    print('-------------------批量梯度下降------------------------------')
    #
    # 批次梯度下降法求解线性回归
    start_time = datetime.datetime.now()
    reg2 = LinearRegression()
    reg2.fit_gd(standard_X_train, y_train)
    end_time = datetime.datetime.now()
    print(reg2.coef_)
    print(reg2.intercept_)
    print(reg2.score(standard_X_test, y_test))
    print('use time:', (end_time - start_time).microseconds)
    print('-------------------随机梯度下降------------------------------')

    # 随机梯度下降法求解线性回归
    start_time = datetime.datetime.now()
    reg3 = LinearRegression()
    reg3.fit_sgd(standard_X_train, y_train, n_iters=100)
    end_time = datetime.datetime.now()
    print(reg3.coef_)  # 斜率
    print(reg3.intercept_)  # 截距
    print(reg3.score(standard_X_test, y_test))
    print('use time:', (end_time - start_time).microseconds)
    print('------------------小批量随机梯度下降--------------------------')

    # 小批量随机梯度下降法
    start_time = datetime.datetime.now()
    reg4 = LinearRegression()
    reg4.fit_msgd(standard_X_train, y_train, batch_size=5)
    end_time = datetime.datetime.now()
    print(reg4.coef_)  # 斜率
    print(reg4.intercept_)  # 截距
    print(reg4.score(standard_X_test, y_test))
    print('use time:', (end_time - start_time).microseconds)
Exemplo n.º 3
0
 def getLinearFactors(self, node):
     """
 get linear factor for Q_h and Q_a, set node.f_linear=[[Q_h factors], [Q_a factors]]
 :param node: the leaf node which all instances under it are used to generate linear q_value model
 :return:
 """
     train_X = [instance.currentObs for instance in node.instances]
     train_Y = [instance.qValue for instance in node.instances]
     l_rate = 0.0001
     n_epochs = 1000
     count = 0
     max_diff = 10000
     tot = None
     if node.f_linear is not None:
         tot = np.transpose(node.f_linear)
         W = np.delete(tot, self.n_dim, 0)
         b = np.array([tot[self.n_dim]])
         count += 1
     elif node.parent and node.parent.f_linear is not None:
         tot = np.transpose(node.parent.f_linear)
         W = np.delete(tot, self.n_dim, 0)
         b = np.array([tot[self.n_dim]])
     while count < TRIES:
         if tot is not None:
             with tf.Session() as sess:
                 LR = lr.LinearRegression(training_epochs=int(n_epochs /
                                                              10**count),
                                          learning_rate=l_rate / 10**count)
                 LR.read_weights(weights=W, bias=b)
                 LR.linear_regression_model()
                 temp_diff, temp_W, temp_b = LR.gradient_descent(
                     sess=sess, train_X=train_X, train_Y=train_Y)
         else:
             with tf.Session() as sess:
                 LR = lr.LinearRegression(training_epochs=n_epochs,
                                          learning_rate=l_rate)
                 LR.read_weights()
                 LR.linear_regression_model()
                 temp_diff, temp_W, temp_b = LR.gradient_descent(
                     sess=sess, train_X=train_X, train_Y=train_Y)
         if temp_diff < max_diff:
             W = temp_W
             b = temp_b
             max_diff = temp_diff
         count += 1
     node.f_linear = np.concatenate((np.transpose(W), np.transpose(b)),
                                    axis=1)
     print("finish linear, node: " + str(node.idx))
Exemplo n.º 4
0
def main():

    data_set = load_boston()

    train_data, train_target, test_data, test_target = LR.split_data(data_set)
    num_features = train_data.shape[1]

    new_train_data = train_data.copy()
    new_test_data = test_data.copy()
    
    for i in range(num_features):
        for j in range(i, num_features):
            new_column = train_data[:, i] * train_data[:, j]
            new_column = new_column.reshape(new_column.shape[0], 1)
            new_train_data = np.append(new_train_data, new_column, 1)

            new_test_column = test_data[:, i] * test_data[:, j]
            new_test_column = new_test_column.reshape(new_test_column.shape[0], 1)
            new_test_data = np.append(new_test_data, new_test_column, 1)
    
    lr = LR.LinearRegression()
    lr.fit(new_train_data, train_target)
    
    mse_test = lr.mse(new_test_data, test_target)
    mse_train = lr.mse(new_train_data, train_target)

    print "\nSol. 3.4"
    print "Linear Regression"
    print "{:^15}|{:^15}".format("Input Data", "MSE")
    print "-"*30
    print "{:^15}|{:^15.7}".format("test_data", mse_test)
    print "{:^15}|{:^15.7}".format("train_data", mse_train)
    print "\n"
def main():

    data_set = load_boston()

    train_data, train_target, test_data, test_target = LR.split_data(data_set)

    min_MSE = sys.maxint
    min_combo = None

    calculated_combos = []

    for combo in set(permutations(range(train_data.shape[1]), 4)):
        if sorted(combo) not in calculated_combos:
            calculated_combos.append(sorted(combo))
            lr = LR.LinearRegression()
            lr.fit(train_data[:, sorted(combo)], train_target)
            MSE = lr.mse(test_data[:, sorted(combo)], test_target)

            if min_MSE > MSE:
                min_MSE = MSE
                min_combo = combo

    print "Brute Force"
    print "Best Combination : [{}], by 1-index: {} with MSE = {:.7}".format(
        ", ".join([data_set.feature_names[x] for x in min_combo]),
        [x + 1 for x in min_combo], min_MSE)
Exemplo n.º 6
0
def testSpeed():
    startIndex, endIndex = 93, 118;
    data = [];
    y = np.mat(data).T;

    h, timespan, M = 1, 6, 2;
    X = np.mat(np.arange(y.shape[0])).T;
    f = LinearRegression.RegressionSplineFunction(int(h * 60 / timespan) + M - 2, M);
    m = LinearRegression.LinearRegression().fit(X, y, [f]);
    yHeat = m.predictValue(X);

    # speed1 = getSpeedM3(m.beta, f.knots, X[startIndex: endIndex, :]);
    # print(speed1.A.flatten().tolist());

    plt.figure(1, (12, 8));
    plt.get_current_fig_manager().window.maximize();
    plt.subplot(211);
    plt.plot(X.A.flatten(), y.A.flatten(), "-xb");
    plt.plot(X.A.flatten(), yHeat.A.flatten(), "-r");
    plt.plot(X[startIndex: endIndex, :].A.flatten(), y[startIndex: endIndex, :].A.flatten(), "or");
    plt.subplot(212);
    plt.plot(X[startIndex: endIndex, :].A.flatten(), y[startIndex: endIndex, :].A.flatten(), "-xb");
    plt.plot(X[startIndex: endIndex, :].A.flatten(), yHeat[startIndex: endIndex, :].A.flatten(), "-r");
    plt.show(block=True);
    plt.close();
Exemplo n.º 7
0
def data_handler():
    index_list = list()
    train_list = list()
    test_list = list()

    train_num, test_num = data_partitions[0], data_partitions[1]

    print(train_num, test_num)

    data = pd.read_csv(filename, delimiter=',', dtype=None, header=None)

    # We should Create numpy array for manipulation
    numpy_data = np.array(data)
    labels = np.array(data.head(1))

    for data_class in classes:

        # index_list is a list of numpy array int64 type
        index_list.append(np.where(labels == data_class)[1])

    for one_class in index_list:
        train_list.extend(one_class[0:train_num])
        test_list.extend(one_class[train_num:])

    print(train_list)
    print(test_list)

    train = np.array(numpy_data[:, train_list])
    test = np.array(numpy_data[:, test_list])

    print(train.shape, test.shape)

    np.savetxt(train_filename, train, delimiter=',', fmt='%i')
    np.savetxt(test_filename, test, delimiter=',', fmt='%i')

    # Call programs
    # Knn
    print("KNN classifier")
    obj = KnnClassification.KnnClassification(10, train_filename,
                                              test_filename)
    obj.train()

    # Centroid method
    print("Centroid classifier")
    obj = CentroidMethod.CentroidMethod(train_filename, test_filename)
    obj.pre_process()
    obj.train()

    # Linear Regression
    print("Linear regression")
    obj = LinearRegression.LinearRegression(train_filename, test_filename)
    obj.compute_coefficients()

    # SVM
    print("SVM classifier")
    obj = Svm.Svm(train_filename, test_filename)
    obj.train()
Exemplo n.º 8
0
def isConstant(y, periods, alpha):
    if y.var() == 0:
        return True;

    p1 = [DataHelper.testWhiteNoise(y - y.mean(), m) for m in periods];
    if np.any(np.mat(p1) <= alpha):
        return False;

    p2 = LinearRegression.LinearRegression().fit(np.mat(range(0, y.shape[0])).T, y).betaP;
    if p2[1, 0] <= alpha:
        return False;

    p3 = DataHelper.testRunsLeft((y > np.quantile(y, 0.5)) - 0);
    if p3 <= alpha:
        return False;

    print("{0}, {1}, {2}".format(p1, p2.T, p3));
    return True;
Exemplo n.º 9
0
def main():
    #load dataset
    data = np.genfromtxt("../datasets/mdataset.csv", delimiter=",")

    #create model
    linreg = LinearRegression(data, 2, 'test')
    linreg.describeModel()

    #training model
    epochs = 60
    linreg.training(epochs, 0.001)


    ##plot data with result lines
    plt.figure(1)


    axis1 = [min(data[:,0]), max(data[:,0]), min(data[:,2]), max(data[:,2])]
    axis2 = [min(data[:,1]), max(data[:,1]), min(data[:,2]), max(data[:,2])]
    axis = [min([axis1[0], axis2[0]]), max([axis1[1], axis2[1]]), 
            min([axis1[2], axis2[2]]), max([axis1[3], axis2[3]])]

    setx = np.asmatrix(np.linspace(axis[0], axis[1])).T
    x0 = np.ones((setx.size, 1)) 
    x = np.concatenate((x0, setx, setx), axis=1)

    plt.scatter(data[:,0], data[:,2])
    plt.scatter(data[:,1], data[:,2])
    plt.plot(setx, linreg.modelFunction(x))
    plt.axis(axis)

    #plt.subplot(212)
    
    #setx = np.linspace(axis[0], axis[1])
    
    #plt.plot(setx, linreg.modelFunction(setx))
    #plt.axis(axis)

    plt.show()
Exemplo n.º 10
0
def testAmplitude():
    startIndex, endIndex = 2855, 2880;
    data = [];
    y = np.mat(data).T;

    h, M = 24, 3;
    X = np.mat(np.arange(y.shape[0])).T;
    # m = LinearRegression.LinearRegression().fit(X, y, [LinearRegression.RegressionSplineFunction(h + M - 2, M)]);
    m = LinearRegression.LinearRegression().fit(X, y, [LinearRegression.RegressionSplineFunction(int(h * 60 / 60) + M - 2, M)]);
    yHeat = m.predictValue(X);

    amplitude = y[startIndex: endIndex, :] - yHeat[startIndex: endIndex, :];
    print(amplitude.A.flatten().tolist());

    plt.figure(1, (12, 8));
    plt.get_current_fig_manager().window.maximize();
    plt.subplot(211);
    plt.plot(X.A.flatten(), y.A.flatten(), "-xb");
    plt.plot(X.A.flatten(), yHeat.A.flatten(), "-r");
    plt.subplot(212);
    plt.plot(X[startIndex: endIndex, :].A.flatten(), y[startIndex: endIndex, :].A.flatten(), "-xb");
    plt.plot(X[startIndex: endIndex, :].A.flatten(), yHeat[startIndex: endIndex, :].A.flatten(), "-r");
    plt.show(block=True);
    plt.close();
Exemplo n.º 11
0
def detectSpeed(j, f):
    print("speed {0} started".format(j));

    M = 2;
    h = 1;
    timespan = 6;
    size, speed = int(3600 / f), [];
    # data = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/Realtime_30/__JNRTDB_YCH_LIC6205.PV.npy")).T;
    data = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/{0}/2020-08-01/data.npy".format(f)));
    marks = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/{0}/2020-08-01/marks.npy".format(f)));
    # y1 = data[:size * (h + 0), j];
    # X1 = np.mat(np.arange(y1.shape[0])).T;
    # knots = findKnots2(y1.A.flatten());
    # f1 = LinearRegression.RegressionSplineFunction(int((h + 0) * 60 / timespan) + M - 2, M, knots);
    # m1 = LinearRegression.LinearRegression().fit(X1, y1, [f1]);
    # sY1 = m1.predictValue(X1);
    # X1 = X1[:, :];
    # y1 = y1[:, :];
    # sY1 = sY1[:, :];
    # speed.extend(getSpeedM2(m1.beta, f1.knots, X1).A.flatten().tolist());

    if not os.path.isfile(f"{f}/speed_{j}_speed.npy"):
        totalCount = math.floor((data.shape[0] - 0) / (size * h));

        for i in range(0, totalCount):
            y2 = data[i * size * h - 0:(i + 1) * size * h + 0, j];
            X2 = np.mat(np.arange(y2.shape[0])).T;
            knots = findKnots3(y2.A.flatten());
            f2 = LinearRegression.RegressionSplineFunction(int((h + 0) * 60 / timespan) + M - 2, M, knots);
            m2 = LinearRegression.LinearRegression().fit(X2, y2, [f2]);
            sY2 = m2.predictValue(X2);
            X2 = X2[:, :];
            y2 = y2[:, :];
            sY2 = sY2[:, :];
            speed.extend(getSpeedM2(m2.beta, f2.knots, X2).A.flatten().tolist());

            # plt.figure(1, (12, 8));
            # # plt.get_current_fig_manager().window.showMaximized();
            # plt.subplot(111);
            # plt.title(f"{i}, {m2.r2}");
            # plt.plot(X2.A.flatten(), y2.A.flatten(), "-xk");
            # plt.plot(X2.A.flatten(), sY2.A.flatten(), "-or");
            # for x in f2.knots:
            #     plt.axvline(x, color = "b");
            # # plt.scatter(f1.knots, [y1.mean()] * len(f1.knots), marker="*", color = "b");
            # # plt.subplot(212);
            # # plt.title(str(i));
            # # plt.plot(X2.A.flatten(), y2.A.flatten(), "-x");
            # # plt.plot(X2.A.flatten(), sY2.A.flatten(), color = "red");
            # plt.show(block = True);
            # plt.savefig(f"/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/speed_images_history_YCH_LIC6206.PV/{i}.png");
            # print(f"{i}/{totalCount} saved.");
            # plt.close();

            # X1, y1, sY1, f1 = X2, y2, sY2, f2;

        print("speed history completed.");
        speed = np.array(speed);
        np.save(f"{f}/speed_{j}_speed.npy", speed);
    else:
        speed = np.load(f"{f}/speed_{j}_speed.npy");

    speedMean, speedStd = speed.mean(), speed.std();
    print(np.logical_or((speed - speedMean) / speedStd < -6, (speed - speedMean) / speedStd > 6).sum());

    plt.figure(1, (12, 8));
    plt.get_current_fig_manager().window.showMaximized();
    plt.hist(speed, bins = 1000);
    for x in [speedMean, speedMean - 6 * speedStd, speedMean + 6 * speedStd]:
        plt.axvline(x, color = "b");
    plt.show(block = True);
    plt.close();

    deltaValues = np.diff(data[:, j], 1, 0);
    deltaMean, deltaStd = deltaValues.mean(), deltaValues.std();
    print(np.logical_or((deltaValues - deltaMean) / deltaStd < -6, (deltaValues - deltaMean) / deltaStd > 6).sum());

    plt.figure(1, (12, 8));
    plt.get_current_fig_manager().window.showMaximized();
    plt.hist(deltaValues.A.flatten(), bins = 1000);
    for x in [deltaMean, deltaMean - 6 * deltaStd, deltaMean + 6 * deltaStd]:
        plt.axvline(x, color = "b");
    plt.show(block = True);
    plt.close();

    indices1 = np.argwhere(speed < (speedMean - 6 * speedStd))[:, 0].flatten().tolist() + np.argwhere(speed > (speedMean + 6 * speedStd))[:, 0].flatten().tolist();
    indices1.sort();
    # showAnomaly(indices1, j, size, data, marks);

    # h = 1;
    # startIndex, offset, values = size * h, int(12 * 60 / f), None;
    # if not os.path.isfile("{0}/speed_{1}_values.npy".format(f, j)):
    #     ftn = LinearRegression.RegressionSplineFunction(int(h * 60 / timespan) + M - 2, M);
    #     X = ftn.getX(np.mat(np.arange(size * h)).T);
    #     x = np.mat([size * h - 1 - offset]);
    #
    #     with multiprocessing.Pool(psutil.cpu_count(False) - 2) as pool:
    #         if M == 3:
    #             T = np.multiply(np.hstack(tuple([x - k for k in ftn.knots])), np.hstack(tuple([(x > k) - 0 for k in ftn.knots])));
    #
    #             # values = [calcSpeedM3(i, j, offset, size, h, data, X, x, T) for i in range(startIndex, size * 24 * 10)];
    #             # showDiff(speed[startIndex: startIndex + len(values)].A.flatten().tolist(), values, size * 6);
    #
    #             values = pool.starmap(calcSpeedM3, [(i, j, offset, size, h, data, X, x, T) for i in range(startIndex, data.shape[0] - offset)]);
    #         else:
    #             T = np.hstack(tuple([(x > k) - 0 for k in ftn.knots]));
    #
    #             # values = [calcSpeedM2(i, j, offset, size, h, data, X, T) for i in range(startIndex, size * 24 * 10)];
    #             # showDiff(speed[startIndex: startIndex + len(values)].A.flatten().tolist(), values, size * 6);
    #
    #             values = pool.starmap(calcSpeedM2, [(i, j, offset, size, h, data, X, T) for i in range(startIndex, data.shape[0] - offset)]);
    #     np.save("{0}/speed_{1}_values.npy".format(f, j), np.mat(values).T);
    #     print("realtime speed completed.");
    #
    # values = np.load(f"{f}/speed_{j}_values.npy");
    # valuesMean, valuesStd = values.mean(), values.std();
    # plt.figure(1, (12, 8));
    # plt.get_current_fig_manager().window.showMaximized();
    # plt.hist(values, bins = 1000);
    # plt.show(block = True);
    # plt.close();

    # indices2 = (np.argwhere(values < (speedMean - 6 * speedStd))[:, 0].flatten() + startIndex).tolist() + (np.argwhere(values > (speedMean + 6 * speedStd))[:, 0].flatten() + startIndex).tolist();
    # indices2.sort();
    # showAnomaly(indices2, j, size, data, marks);

    forest = None;
    if not os.path.isfile("{0}/speed_{1}_forest.npy".format(f, j)):
        dataSet = np.mat(speed).T;
        forest = IsolationForest(200, 2 ** 9, CurvesThresholdFinder(0.65, 0.68, 0.73, False));
        forest.fill(dataSet);
        print("forest fill completed");
        forest.train(dataSet);
        print("forest train completed");

        with open("{0}/speed_{1}_forest.npy".format(f, j), "wb") as file:
            pickle.dump(forest, file, protocol = pickle.DEFAULT_PROTOCOL);
    else:
        with open("{0}/speed_{1}_forest.npy".format(f, j), "rb") as file:
            forest = pickle.load(file);

    # scores = None;
    # if not os.path.isfile("{0}/speed_{1}_scores.npy".format(f, j)):
    #     with multiprocessing.Pool(psutil.cpu_count(False) - 2) as pool:
    #         scores = pool.map(forest.getAnomalyScore, [np.mat([v]) for v in values.A.flatten().tolist()]);
    #     np.save("{0}/speed_{1}_scores.npy".format(f, j), np.mat(scores).T);
    #     print("realtime score completed.");
    #
    # scores = np.mat(np.load("{0}/speed_{1}_scores.npy".format(f, j)));
    # plt.figure(1, (12, 8));
    # plt.get_current_fig_manager().window.showMaximized();
    # plt.hist(scores.A.flatten(), bins = 1000);
    # plt.show(block = True);
    # plt.close();

    scores = np.array(forest.scores);
    indices3 = np.argwhere(scores >= forest.threshold)[:, 0].flatten().tolist();
    indices3.sort();
    # showAnomaly(indices3, j, size, data, marks);

    # indices4 = (np.argwhere(values < (speedMean - 3 * speedStd))[:, 0].flatten()).tolist() + (np.argwhere(values > (speedMean + 3 * speedStd))[:, 0].flatten()).tolist();
    # indices4 = [i + startIndex for i in indices4 if values[i, 0] < speedMean - 6 * speedStd or values[i, 0] > speedMean + 6 * speedStd or scores[i] >= forest.threshold];
    # indices4.sort();
    # showAnomaly(indices4, j, size, data, marks);

    # deltaScores = None;
    # if not os.path.isfile("{0}/speed_{1}_delta_scores.npy".format(f, j)):
    #     with multiprocessing.Pool(psutil.cpu_count(False) - 2) as pool:
    #         deltaScores = pool.map(forest.getAnomalyScore, [np.mat([v]) for v in deltaValues.A.flatten().tolist()]);
    #     np.save("{0}/speed_{1}_delta_scores.npy".format(f, j), np.mat(deltaScores).T);
    #
    # deltaScores = np.mat(np.load("{0}/speed_{1}_delta_scores.npy".format(f, j)));

    # indices5 = [i + 1 for i in range(0, deltaValues.shape[0]) if deltaValues[i, 0] < deltaMean - 6 * deltaStd or deltaValues[i, 0] > deltaMean + 6 * deltaStd];
    indices5 = np.argwhere(deltaValues < (deltaMean - 6 * deltaStd))[:, 0].flatten().tolist() + np.argwhere(deltaValues > (deltaMean + 6 * deltaStd))[:, 0].flatten().tolist();
    indices5 = [i + 1 for i in indices5];
    indices5.sort();
    # showAnomaly(indices5, j, size, data, marks);
    # showAnomaly2(indices4, indices5, j, size, data, marks);

    print("speed {0} completed".format(j));
Exemplo n.º 12
0
def detectAmplitude(j, f):
    print("amplitude {0} started".format(j));

    M = 3;
    h = 24;
    size, sY = int(3600 / f),  [];
    data = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/{0}/2020-08-01/data.npy".format(f)));
    marks = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/{0}/2020-08-01/marks.npy".format(f)));
    # y1 = data[:size * (h + 1), j];
    # X1 = np.mat(np.arange(y1.shape[0])).T;
    # m1 = LinearRegression.LinearRegression().fit(X1, y1, [LinearRegression.RegressionSplineFunction((h + 1) + M - 2, M)]);
    # sY1 = m1.predictValue(X1);
    # X1 = X1[:-size, :];
    # y1 = y1[:-size, :];
    # sY1 = sY1[:-size, :];
    # sY.extend(sY1.A.flatten().tolist());

    if not os.path.isfile(f"{f}/amplitude_{j}_amplitude.npy"):
        totalCount = math.floor((data.shape[0] - 0) / (size * h));

        for i in range(0, totalCount):
            y2 = data[i * size * h - 0:(i + 1) * size * h + 0, j];
            X2 = np.mat(np.arange(y2.shape[0])).T;
            m2 = LinearRegression.LinearRegression().fit(X2, y2, [LinearRegression.RegressionSplineFunction(h + M - 2, M)]);
            sY2 = m2.predictValue(X2);
            X2 = X2[:, :];
            y2 = y2[:, :];
            sY2 = sY2[:, :];
            sY.extend(sY2.A.flatten().tolist());

            # plt.figure(1, (12, 8));
            # # plt.get_current_fig_manager().window.showMaximized();
            # plt.subplot(111);
            # plt.title(f"{i}, {m2.r2}");
            # plt.plot(X2.A.flatten(), y2.A.flatten(), "-xk");
            # plt.plot(X2.A.flatten(), sY2.A.flatten(), "-or");
            # for x in f2.knots:
            #     plt.axvline(x, color = "b");
            # # plt.scatter(f1.knots, [y1.mean()] * len(f1.knots), marker="*", color = "b");
            # # plt.subplot(212);
            # # plt.title(str(i));
            # # plt.plot(X2.A.flatten(), y2.A.flatten(), "-x");
            # # plt.plot(X2.A.flatten(), sY2.A.flatten(), color = "red");
            # plt.show(block = True);
            # plt.savefig(f"/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/amplitude_images_history_YCH_FI6221.PV/{i}.png");
            # print(f"{i}/{totalCount} saved.");
            # plt.close();

            # X1, y1, sY1, f1 = X2, y2, sY2, f2;

        print("amplitude history completed.");
        amplitude = data[: len(sY), j].A.flatten() - np.array(sY);
        np.save(f"{f}/amplitude_{j}_amplitude.npy", amplitude);
    else:
        amplitude = np.load(f"{f}/amplitude_{j}_amplitude.npy");

    amplitudeMean, amplitudeStd = amplitude.mean(), amplitude.std();
    print(DataHelper.testNormalDistribution(amplitude));
    # plt.figure(1, (12, 8));
    # plt.get_current_fig_manager().window.showMaximized();
    # plt.hist(amplitude, bins = 1000);
    # plt.show(block = True);
    # plt.close();

    indices1 = np.argwhere(amplitude < (amplitudeMean - 6 * amplitudeStd))[:, 0].flatten().tolist() + np.argwhere(amplitude > (amplitudeMean + 6 * amplitudeStd))[:, 0].flatten().tolist();
    indices1.sort();
    showAnomaly(indices1, j, size, data, marks);

    h, m = 24, 12; # 24 hours, 12 minutes
    startIndex, offset, values = size * h, int(m * 60 / f), None;
    if not os.path.isfile(f"{f}/amplitude_{j}_values.npy"):
        with multiprocessing.Pool(psutil.cpu_count(False) - 2) as pool:
            values = pool.starmap(calcAmplitude, [(i, j, offset, size, h, M, data) for i in range(startIndex, data.shape[0] - offset)]);
        np.save("{0}/amplitude_{1}_values.npy".format(f, j), np.array(values));
    else:
        values = np.load(f"{f}/amplitude_{j}_values.npy");

    # plt.figure(1, (12, 8));
    # plt.get_current_fig_manager().window.showMaximized();
    # plt.hist(values, bins = 1000);
    # plt.show(block = True);
    # plt.close();

    indices2 = (np.argwhere(values < (amplitudeMean - 6 * amplitudeStd))[:, 0] + startIndex).tolist() + (np.argwhere(values > (amplitudeMean + 6 * amplitudeStd))[:, 0] + startIndex).tolist();
    indices2.sort();
    showAnomaly(indices2, j, size, data, marks);

    # forest = None;
    # if not os.path.isfile("{0}/amplitude_{1}_forest.npy".format(f, j)):
    #     forest = IsolationForest(200, 2 ** 9, CurvesThresholdFinder(0.65, 0.68, 0.73, False));
    #     forest.fill(amplitude);
    #     print("forest fill completed");
    #     forest.train(amplitude);
    #     print("forest train completed");
    #
    #     with open("{0}/amplitude_{1}_forest.npy".format(f, j), "wb") as file:
    #         pickle.dump(forest, file, protocol = pickle.DEFAULT_PROTOCOL);
    # else:
    #     with open("{0}/amplitude_{1}_forest.npy".format(f, j), "rb") as file:
    #         forest = pickle.load(file);
    #
    # scores = None;
    # if not os.path.isfile("{0}/amplitude_{1}_scores.npy".format(f, j)):
    #     with multiprocessing.Pool(psutil.cpu_count(False) - 2) as pool:
    #         scores = pool.map(forest.getAnomalyScore, [np.mat([v]) for v in values.A.flatten().tolist()]);
    #     np.save("{0}/amplitude_{1}_scores.npy".format(f, j), np.mat(scores).T);
    #
    # scores = np.mat(np.load("{0}/amplitude_{1}_scores.npy".format(f, j)));
    # plt.figure(1, (12, 8));
    # plt.get_current_fig_manager().window.maximize();
    # plt.hist(scores.A.flatten(), bins = 1000);
    # plt.show(block = True);
    # plt.close();

    # indices3 = (np.argwhere(scores >= forest.threshold)[:, 0].flatten() + startIndex).tolist();
    # indices3.sort();
    # showAnomaly(indices3, j, size, data, marks);

    # indices4 = (np.argwhere(values < (amplitudeMean - 3 * amplitudeStd))[:, 0].flatten()).tolist() + (np.argwhere(values > (amplitudeMean + 3 * amplitudeStd))[:, 0].flatten()).tolist();
    # indices4 = [i + startIndex for i in indices4 if values[i, 0] < amplitudeMean - 6 * amplitudeStd or values[i, 0] > amplitudeMean + 6 * amplitudeStd or scores[i] >= forest.threshold];
    # indices4.sort();
    # showAnomaly(indices4, j, size, data, marks);

    print("amplitude {0} completed".format(j));
Exemplo n.º 13
0
def calcSpeedM3(i, j, offset, size, h, data, X, x, T):
    y = data[i + 1 + offset - size * h:i + 1 + offset, j];
    m = LinearRegression.LinearRegression().fit(X, y);
    return getSpeedM3Internal(m.beta, x, T)[0, 0];
Exemplo n.º 14
0
import LinearRegression
from preprosessing import *
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression as LR
import numpy as np

if __name__ == "__main__":
    data = load_boston()
    X = mean_norm(data['data'])  # Normalizes the data set
    y = data['target']
    X_train, y_train, X_test, y_test = split_data(X, y)
    model = LinearRegression.LinearRegression()
    theta, cost = model.gradient_descent(X_train, y_train)
    print(mean_norm(X_test).dot(theta))

    model1 = LR()
    model1.fit(X_train, y_train)
    print(model1.predict(mean_norm(X_test)))
Exemplo n.º 15
0
def calcAmplitude(i, j, offset, size, h, M, data):
    X = np.mat(np.arange(size * h)).T;
    y = data[i + 1 + offset - size * h: i + 1 + offset, j];
    return data[i, j] - LinearRegression.LinearRegression().fit(X, y, [LinearRegression.RegressionSplineFunction(h + M - 2, M)]).predictValue(np.mat([size * h - 1 - offset]))[0, 0];
Exemplo n.º 16
0
from pyspark.ml.linalg import Vectors
from pyspark.ml.feature import VectorAssembler
assembler = VectorAssembler(inputCols = [inputCol1, inputCol2, inputCol3, inputCol4 ],outputCol = ‘features’)  

#The output variable will have all the columns in the data set plus an additional features column, which is a vector of all
#the inputColumns we gave
output = assembler.transform(Indexed)
final_data = output.select(‘features’, ‘dependentVariableName’)

#Splitting the data into training and test sets
train_data, test_data = final_data.randomSplit([0.7,0.3])
train_data.show()
test_data.describe.show()

#Building the linear regression model with input as 'features'
model = LinearRegression(featuresCol = ‘features’, labelCol = ‘<outputColName>’, predictionCol = ‘prediction’)
lrModel = lr.fit(train_data)

#Evaluate how our model performed on test data
test_results = lrModel.evaluate(test_data)  
test_resulsts.residuals.show()
test_results.rootMeanSquaredError

#Model perfomance parameter : R-square
test_results.r2

#Check what the predictions will be on data that doesn’t have a label value
unlabeled_data = test_data.select(‘features’)  
predictions = lrModel.transform(unlabeled_data)
predictions.show()
Exemplo n.º 17
0
            data_frame, 39, 9)
        centroid_data_frame_train = deepcopy(train_data_with_labels)
        centroid_data_frame_test = deepcopy(test_data_with_labels)
        # make_file_and_save_data_train = Task_E.store(train_data_set_without_labels.T, train_y, 'jenil_train.csv')
        # make_file_and_save_data_test = Task_E.store(test_data_set_without_labels.T, test_y, 'jenil_test.csv')
        k = 5
        knn_object = Knn(k)
        data_with_euclidean_distance = knn_object.calculate_distance(
            train_data_with_labels.values, test_data_with_labels.values)
        accuracy = knn_object.get_accuracy([
            (k['Test Label'], k['Classification'])
            for k in data_with_euclidean_distance
        ])
        print('Accuracy of Knn is:', accuracy)
        # Linear Regression
        linear_regression_object = LinearRegression.LinearRegression()
        N_train, L_train, Xtrain = len(
            train_y), train_y, train_data_set_without_labels.T

        N_test, Ytest, Xtest = len(
            test_y), test_y, test_data_set_without_labels.T

        Ytrain = linear_regression_object.indicator_matrix(L_train)
        linear_regression_object.accuracy(N_train, N_test, Xtrain, Xtest,
                                          Ytrain, Ytest)

        # SVM
        svm_object = Svm.SupportVectorMachine()
        svm_object.find_accuracy(train_data_set_without_labels, train_y,
                                 test_data_set_without_labels, test_y)
Exemplo n.º 18
0
x = 'area_mean'
sns.lmplot(x=x, y='Target', data=df, ci=None)
plt.ylim([-0.5, 1.5])
plt.xlim([df[x].min()- df[x].std(), df[x].max() + df[x].std()])
plt.show(


from sklearn.linear_model import LinearRegression
linreg = LinearRegression().fit(df.area_mean.values.reshape(-1, 1), df.Target)
# compute prediction for area_mean=350 using the predict method
linreg.predict(np.array([[5], [350]]))

df['Pred_class'] = df.Prediction.map(lambda x: 1 if x> 0.05 else 0 )

# fit a logistic regression model and store the class predictions
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(C=1e9, solver='lbfgs')
feature_cols = ['area_mean']
X = df[feature_cols]
y = df.Target
logreg.fit(X, y)
df['Log_Prediction'] = logreg.predict(X)
df['Log_probabilities'] = logreg.predict_proba(X)[:,1]

(df.Pred_class != df.Log_Prediction).sum()
Exemplo n.º 19
0
	dataMat = mat(dataMat); labelMat = mat(labelMat)
	labelMat = labelMat.T
	sum_error_train = 0; sum_error = 0; sum_rr = 0
	for j in range(10):
		z = range(27)
		random.shuffle(z)
		x = zeros([20, 5]); y = zeros([20, 1]); newx = zeros([7, 5]); newy = zeros([7, 1])
		x = mat(x); y = mat(y); newx = mat(newx); newy = mat(newy)
		for i in range(20):
			x[i] = dataMat[z[i]]
			y[i] = labelMat[z[i]]
		for i in range(20, 27):
			newx[i-20] = dataMat[z[i]]
			newy[i-20] = labelMat[z[i]]
		# 调用线性回归方法,得到训练集和测试集误差和r方
		error_train, error, rr = LinearRegression(x, y, newx, newy)
		sum_error_train += error_train; sum_error += error; sum_rr += rr
	error_train = sum_error_train / 10; error = sum_error / 10; rr = sum_rr / 10
	print '训练集均方误差=', error_train
	print '验证集均方误差=', error
	print 'r方=', rr

elif a == '2':
	#n次k折交叉验证
	dataMat, labelMat = loadDataSet('10.txt')
	dataMat = mat(dataMat); labelMat = mat(labelMat)
	labelMat = labelMat.T
	sum_error_train = 0; sum_error = 0; sum_rr = 0
	for j in range(10):
		z = range(27)
		random.shuffle(z)
Exemplo n.º 20
0
rom sklearn.linear_model import LinearRegression

reg = Ridge()
reg = LinearRegression() # making an object

reg.fit(nd1,nd2)
reg.predict(nd1)

#utils
shuffle

#preprocessing
LabelEncoder, OrdinalEncoder , label_binarize
PolynomialFeatures
MinMaxScaler, StandardScaler, RobustScaler # scale

#model_selection
train_test_split ,  cross_val_score , cross_validate, learning_curve## only one split, cv split only scores, cv split scores and more info, cross_validate on different train_sizes 
GridSearchCV , RandomizedSearchCV , validation_curve # on params


#metrics
from sklearn.metrics import make_scorer
from sklearn.metrics import mean_squared_error, f1_score, accuracy_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix,  classification_report #cnfmatrix = confusion_matrix(Y,predicted_Y)
from sklearn.metrics import precision_recall_curve , roc_curve , roc_auc_score



############################################################
Exemplo n.º 21
0
import numpy as np
import pandas as pd
import LinearRegression as lr
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

from sklearn.datasets import load_boston

dataset = load_boston()
X = dataset.data
y = dataset.target
print(f"This dataset contains {X.shape[0]} entries and {X.shape[1]} features")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

my_regressor = lr.LinearRegression(X_train, y_train).fit()
sklearn_regressor = LinearRegression().fit(X_train, y_train)

my_train_accuracy = my_regressor.score()
sklearn_train_accuracy = sklearn_regressor.score(X_train, y_train)

my_test_accuracy = my_regressor.score(X_test, y_test)
sklearn_test_accuracy = sklearn_regressor.score(X_test, y_test)

result = pd.DataFrame([[my_train_accuracy, sklearn_train_accuracy],
                       [my_test_accuracy, sklearn_test_accuracy]],
                      ['Training Acc.', 'Test Acc.'], ['Our\'s', 'Sklearn\'s'])

print(result)
import sklearn.linear_model import LinearRegression

data = pd.read_csv( path )

x = data[["col1","col2"]] # input 
y = data["col3"]          # target


    # check shapes
print( x.shape,  y.shape) # if are the same we can proceed
if  x.shape !=  y.shape:
    x = x.values.reshape(a,b)
    y = y.values.reshape(c,d)

reg = LinearRegression() # initialize the model
reg.fit(x,y)


    # model performance parameter
R_squared = reg.score(x,y)
coefficents = reg.coef_
intercept = reg.intercept_



    # prediction
x_value = 155.00
reg.predict( x_value )

Exemplo n.º 23
0
import LinearRegression as us

plt.title("Toy Dataset")
X1, Y1 = make_regression(n_samples=100000,
                         n_features=100,
                         n_informative=80,
                         n_targets=1,
                         noise=0.5)

X1_train = X1[:-10000]
Y1_train = Y1[:-10000]

X1_test = X1[-10000:]
Y1_test = Y1[-10000:]
our_rg = us.LinearRegression()
our_rg.fit(X1_train, Y1_train)
our_Y1_pred = our_rg.predict(X1_test)
our_Y1_fit = our_rg.predict(X1_train)


def count_predict_Loss(y, target):
    sum = 0
    for i in range(len(y)):
        sum += (y[i] - target[i])**2
    return sum / len(y)


train_loss = count_predict_Loss(our_Y1_fit, Y1_train)
test_loss = count_predict_Loss(our_Y1_pred, Y1_test)

print("train_loss:   ", train_loss)
Exemplo n.º 24
0
import sys
import LinearRegression
from LinearRegression import LinearRegression

if __name__ == "__main__":
    inputFileName = "Input/auto_mpg.csv"
    outputDirectory = "Output/"
    numInstances = 398
    numAttributes = 8

    linearRegression = LinearRegression(inputFileName, outputDirectory,
                                        numInstances, numAttributes)
    linearRegression.process()
Exemplo n.º 25
0
## Lineer regresyon test kodları

import numpy as np
from sklearn import datasets

boston_X, boston_y = datasets.load_boston(return_X_y=True)
boston_X = boston_X[:, np.newaxis, 5]
X_train = boston_X[:-20]
X_test = boston_X[-20:]
y_train = boston_y[:-20]
y_test = boston_y[-20:]

from LinearRegression import *

lin = LinearRegression()
lin.buildModel(X_train, y_train)
lin.evaluateModel(X_test, y_test)
lin.predictValue(5)

###########################################
## KNN test kodları

from sklearn.datasets import load_iris
from sklearn.utils import shuffle

iris_X, iris_y = load_iris(return_X_y=True)
iris_X, iris_y = shuffle(iris_X, iris_y)
X_train = iris_X[:-30]
X_test = iris_X[-30:]
y_train = iris_y[:-30]
Exemplo n.º 26
0
def PlotData (lastPxx, UnLoadPxx1, UnLoadTime1, WriteFolderName, WriteFileNameEnd):
	X             =  timeStep*1e-6*np.array(UnLoadTime1)
	UnLoadTime1   =  timeStep*1e-6*np.array(UnLoadTime1)
	UnLoadPxx1    =  np.array(UnLoadPxx1)
	Y             =  np.log(UnLoadPxx1)

	# print(Y-UnLoadPxx1)
	# plt.scatter(X, Y, color='b', label='log(pxx)')
	# plt.scatter(X, UnLoadPxx1, color='r', label='pxx')
	# plt.legend()
	# plt.show()
	# plt.close()
	# print(X.shape)
	# print(Y.shape)
	# print(X)
	# print(Y)

	X.shape     =  (X.shape[0], 1)
	X1          =  X
	X1          =  np.hstack(( np.ones((X1.shape[0],1)), X1 ))


	Iteration, Cost, Theta = LinReg.LinearRegression (X1, Y, learnRate, thisLambda)

	plt.plot(Iteration[2:], Cost[2:], label='cost')
	plt.legend()
	plt.show()
	plt.close()


	print(Theta)

	H = X1.dot(Theta)

	for i in range (X.shape[1]):
		plt.scatter(X[:, i], Y, color='b', label='target')
		plt.scatter(X[:, i], H, color='r', label='fit')
		plt.legend()
		plt.show()
		plt.close()


	# # realTheta0  =  random.randint(1,1000)
	# # # realTheta1  =  random.randint(1,5)
	# # realTheta1  =  random.uniform(1.0, 5.0)
	# # X2          =  np.linspace(0, 12, 100)
	# # Y2          =  realTheta0*np.exp(-realTheta1*X2)
	# # Y21         =  np.log(Y2)

	# # X2.shape     =  (X2.shape[0], 1)
	# # # X21          =  (X2 - np.mean(X2))/np.std(X2)
	# # X21          =  X2
	# # X21          =  np.hstack(( np.ones((X21.shape[0],1)), X21 ))


	# # Iteration, Cost, Theta = LinReg.LinearRegression (X21, Y21, learnRate, thisLambda)
	# # plt.plot(Iteration, Cost, label='cost')
	# # plt.legend()
	# # plt.show()
	# # plt.close()


	# # print(realTheta0, realTheta1, Theta)
	# # # print(realTheta0, realTheta1, np.exp(Theta[0]), Theta)


	# # plt.scatter(X2, Y21, color='b', label='target')
	# # plt.scatter(X2, X21.dot(Theta), color='r', label='fit')
	# # # plt.scatter(X2, np.log(realTheta0*np.exp(Theta[1]*X2)), color='g', label='fit+')
	# # plt.legend()
	# # plt.show()
	# # plt.close()

	



	



	# X             =  np.linspace(0, fit1, 100)
	# X0            =  np.linspace(0, stopTimeInNs, 100)
	# X1            =  np.linspace(fit1, stopTimeInNs, 100)
	# X2            =  np.linspace(fit2, stopTimeInNs, 100)
	# X3            =  np.linspace(fit3, stopTimeInNs, 100)
	# FitPxx        =  lastPxx*np.exp(-X*Fit[countChosen, countStrain])


	# fig           =  plt.figure(1, figsize=(3.5, 3.5))

	# plt.subplot(111)
	# plt.plot(UnLoadTime1, UnLoadPxx1, 'o', linewidth=2, label='data')
	# plt.plot(X, FitPxx, linewidth=2, label='fit:' +  str(Fit[countChosen, countStrain]))
	
	# plt.xlabel('Time (ns)')
	# plt.ylabel('Stress (MPa)')
	# plt.legend(bbox_to_anchor=(0., 0.98, 1., .104), loc=3, ncol=3, mode="expand", borderaxespad=0., fontsize = 'xx-small')
	# plt.xticks(rotation=45)
	# plt.tight_layout()
	# plt.savefig(os.path.join(WriteFolderName, "Fit" + WriteFileNameEnd + ".png"))
	# plt.show()
	# plt.close(fig)


	UnLoadTime1               = []
	UnLoadStrain1             = []
	UnLoadPxx1                = []
Exemplo n.º 27
0
import LinearRegression
import numpy as np

X = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

# Y = 0 + 1X
Y = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

modal = LinearRegression.LinearRegression()

modal.train(X, Y)

print(modal.predict(14))
Exemplo n.º 28
0
    random_data = np.random.rand(len(y), total_features)

    # Update each feature with slope and randomness
    random_data[:, 0] = np.ones(len(y))
    for i in range(0, total_features - 1):
        random_data[:, i + 1] += (y * slope[i]) + np.random.uniform(
            -randomness[i], randomness[i], (len(y)))

    return (random_data[:, 0:total_features], y)


# Generate random data
x, y = generate_data(3, [-0.5, 0.2], [0.3, 1], (0, 10, 0.1))

# First model use gradient descent
linear_regression = lr.LinearRegression(3)
linear_regression.batch_size = 25
linear_regression.total_epochs = 40

# Second model use normal equation
linear_regression_1 = lr.LinearRegression(3)
linear_regression_1.weight = np.copy(linear_regression.weight)

print(f'---Gradient Descent---')
print(f'Initial cost : {linear_regression.cost(x, y)}')

# Start gradient Descent on first model
start = timeit.default_timer()
linear_regression.gradient_descent(x, y)
linear_regression.gradient_descent(x, y)
taken = (timeit.default_timer() - start)
testErr = [0.0] * nfold
allIndex = range(0, m)
for i in range(0, nfold):

    testIndex = range((foldSize * i), foldSize * (i + 1))
    trainIndex = list(set(allIndex) - set(testIndex))

    trainX = X[trainIndex, :]
    trainY = Y[trainIndex]
    testX = X[testIndex, :]
    testY = Y[testIndex]

    # set parameter
    alpha = 0.01
    lam = 0.1
    model = LR.LinearRegression(trainX, trainY, alpha, lam)
    model.run(400, printIter=False)

    trainPred = model.predict(trainX)
    trainErr[i] = sum((trainPred - trainY)**2) / len(trainY)

    testPred = model.predict(testX)
    testErr[i] = sum((testPred - testY)**2) / len(testY)

    print "train Err=", trainErr[i], "test Err=", testErr[i]
    print " "

print "summary:"
print "average train err=", numpy.mean(trainErr)
print "average test err=", numpy.mean(testErr)
Exemplo n.º 30
0
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 29 20:45:43 2018

@author: htshinichi
"""
from matplotlib import pyplot as plt
import LinearRegression
import pandas as pd
import numpy as np

data = pd.read_csv("test_Regression.csv")
X = data.x1
y = data.label
model_linr = LinearRegression.LinearRegression()
model_linr.fit(data)
print(model_linr.weights)
print(model_linr.bias)
line_X = np.arange(X.min(), X.max())[:, np.newaxis]
line_y = model_linr.predict(line_X)

plt.plot(line_X, line_y, color='navy', linewidth=2, label='Linear regressor')
plt.scatter(X, y, color='yellowgreen', marker='.', label='Inliers')
plt.legend(loc='lower right')
plt.xlabel("Input")
plt.ylabel("Response")
plt.show()