def detectChange(j, f): print("change {0} started".format(j)); M = 2; h = 12; timespan = 6; size, speed = int(3600 / f), []; data = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/{0}/data.npy".format(f))); marks = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/{0}/marks.npy".format(f))); y1 = data[:size * (h + 1), j]; X1 = np.mat(np.arange(y1.shape[0])).T; f1 = LinearRegression.RegressionSplineFunction(int((h + 1) * 60 / timespan) + M - 2, M); m1 = LinearRegression.LinearRegression().fit(X1, y1, [f1]); sY1 = m1.predictValue(X1); X1 = X1[:-size, :]; y1 = y1[:-size, :]; sY1 = sY1[:-size, :]; if M == 3: speed.extend(getSpeedM3(m1.beta, f1.knots, X1).A.flatten().tolist()); else: speed.extend(getSpeedM2(m1.beta, f1.knots, X1).A.flatten().tolist()); for i in range(1, math.floor((data.shape[0] - size) / (size * h))): y2 = data[i * size * h - size:(i + 1) * size * h + size, j]; X2 = np.mat(np.arange(y2.shape[0])).T; f2 = LinearRegression.RegressionSplineFunction(int((h + 2) * 60 / timespan) + M - 2, M); m2 = LinearRegression.LinearRegression().fit(X2, y2, [f2]); sY2 = m2.predictValue(X2); X2 = X2[size:-size, :]; y2 = y2[size:-size, :]; sY2 = sY2[size:-size, :]; if M == 3: speed.extend(getSpeedM3(m2.beta, f2.knots, X2).A.flatten().tolist()); else: speed.extend(getSpeedM2(m2.beta, f2.knots, X2).A.flatten().tolist()); plt.figure(1, (12, 8)); plt.get_current_fig_manager().window.maximize(); plt.subplot(211); plt.title(str(i - 1)); plt.plot(X1.A.flatten(), y1.A.flatten(), "-x"); plt.plot(X1.A.flatten(), sY1.A.flatten(), color = "red"); plt.subplot(212); plt.title(str(i)); plt.plot(X2.A.flatten(), y2.A.flatten(), "-x"); plt.plot(X2.A.flatten(), sY2.A.flatten(), color = "red"); plt.show(block = True); plt.close(); X1, y1, sY1 = X2, y2, sY2; print("change history completed."); speed = np.mat(speed).T; speedMean, speedStd = speed.mean(), speed.std(); plt.figure(1, (12, 8)); plt.get_current_fig_manager().window.maximize(); plt.hist(speed.A.flatten(), bins = 1000); plt.show(block = True); plt.close();
def test_Linear_Regression(): boston = datasets.load_boston() X = boston.data y = boston.target print(X.shape) # (500,13) X = X[y < 50.0] y = y[y < 50.0] X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666) # 如果使用梯度下降需要标准化数据,不然会造成内存溢出 scaler = StandardScaler() scaler.fit(X_train) standard_X_train = scaler.transform(X_train) standard_X_test = scaler.transform(X_test) print('-------------------正规公式计算-----------------------------') # 正规公式法求解线性回归 start_time = datetime.datetime.now() reg = LinearRegression() reg.fit_normal(X_train, y_train) # 无需归一化 end_time = datetime.datetime.now() print(reg.coef_) print(reg.intercept_) print(reg.score(X_test, y_test)) print('use time:', (end_time - start_time).microseconds) print('-------------------批量梯度下降------------------------------') # # 批次梯度下降法求解线性回归 start_time = datetime.datetime.now() reg2 = LinearRegression() reg2.fit_gd(standard_X_train, y_train) end_time = datetime.datetime.now() print(reg2.coef_) print(reg2.intercept_) print(reg2.score(standard_X_test, y_test)) print('use time:', (end_time - start_time).microseconds) print('-------------------随机梯度下降------------------------------') # 随机梯度下降法求解线性回归 start_time = datetime.datetime.now() reg3 = LinearRegression() reg3.fit_sgd(standard_X_train, y_train, n_iters=100) end_time = datetime.datetime.now() print(reg3.coef_) # 斜率 print(reg3.intercept_) # 截距 print(reg3.score(standard_X_test, y_test)) print('use time:', (end_time - start_time).microseconds) print('------------------小批量随机梯度下降--------------------------') # 小批量随机梯度下降法 start_time = datetime.datetime.now() reg4 = LinearRegression() reg4.fit_msgd(standard_X_train, y_train, batch_size=5) end_time = datetime.datetime.now() print(reg4.coef_) # 斜率 print(reg4.intercept_) # 截距 print(reg4.score(standard_X_test, y_test)) print('use time:', (end_time - start_time).microseconds)
def getLinearFactors(self, node): """ get linear factor for Q_h and Q_a, set node.f_linear=[[Q_h factors], [Q_a factors]] :param node: the leaf node which all instances under it are used to generate linear q_value model :return: """ train_X = [instance.currentObs for instance in node.instances] train_Y = [instance.qValue for instance in node.instances] l_rate = 0.0001 n_epochs = 1000 count = 0 max_diff = 10000 tot = None if node.f_linear is not None: tot = np.transpose(node.f_linear) W = np.delete(tot, self.n_dim, 0) b = np.array([tot[self.n_dim]]) count += 1 elif node.parent and node.parent.f_linear is not None: tot = np.transpose(node.parent.f_linear) W = np.delete(tot, self.n_dim, 0) b = np.array([tot[self.n_dim]]) while count < TRIES: if tot is not None: with tf.Session() as sess: LR = lr.LinearRegression(training_epochs=int(n_epochs / 10**count), learning_rate=l_rate / 10**count) LR.read_weights(weights=W, bias=b) LR.linear_regression_model() temp_diff, temp_W, temp_b = LR.gradient_descent( sess=sess, train_X=train_X, train_Y=train_Y) else: with tf.Session() as sess: LR = lr.LinearRegression(training_epochs=n_epochs, learning_rate=l_rate) LR.read_weights() LR.linear_regression_model() temp_diff, temp_W, temp_b = LR.gradient_descent( sess=sess, train_X=train_X, train_Y=train_Y) if temp_diff < max_diff: W = temp_W b = temp_b max_diff = temp_diff count += 1 node.f_linear = np.concatenate((np.transpose(W), np.transpose(b)), axis=1) print("finish linear, node: " + str(node.idx))
def main(): data_set = load_boston() train_data, train_target, test_data, test_target = LR.split_data(data_set) num_features = train_data.shape[1] new_train_data = train_data.copy() new_test_data = test_data.copy() for i in range(num_features): for j in range(i, num_features): new_column = train_data[:, i] * train_data[:, j] new_column = new_column.reshape(new_column.shape[0], 1) new_train_data = np.append(new_train_data, new_column, 1) new_test_column = test_data[:, i] * test_data[:, j] new_test_column = new_test_column.reshape(new_test_column.shape[0], 1) new_test_data = np.append(new_test_data, new_test_column, 1) lr = LR.LinearRegression() lr.fit(new_train_data, train_target) mse_test = lr.mse(new_test_data, test_target) mse_train = lr.mse(new_train_data, train_target) print "\nSol. 3.4" print "Linear Regression" print "{:^15}|{:^15}".format("Input Data", "MSE") print "-"*30 print "{:^15}|{:^15.7}".format("test_data", mse_test) print "{:^15}|{:^15.7}".format("train_data", mse_train) print "\n"
def main(): data_set = load_boston() train_data, train_target, test_data, test_target = LR.split_data(data_set) min_MSE = sys.maxint min_combo = None calculated_combos = [] for combo in set(permutations(range(train_data.shape[1]), 4)): if sorted(combo) not in calculated_combos: calculated_combos.append(sorted(combo)) lr = LR.LinearRegression() lr.fit(train_data[:, sorted(combo)], train_target) MSE = lr.mse(test_data[:, sorted(combo)], test_target) if min_MSE > MSE: min_MSE = MSE min_combo = combo print "Brute Force" print "Best Combination : [{}], by 1-index: {} with MSE = {:.7}".format( ", ".join([data_set.feature_names[x] for x in min_combo]), [x + 1 for x in min_combo], min_MSE)
def testSpeed(): startIndex, endIndex = 93, 118; data = []; y = np.mat(data).T; h, timespan, M = 1, 6, 2; X = np.mat(np.arange(y.shape[0])).T; f = LinearRegression.RegressionSplineFunction(int(h * 60 / timespan) + M - 2, M); m = LinearRegression.LinearRegression().fit(X, y, [f]); yHeat = m.predictValue(X); # speed1 = getSpeedM3(m.beta, f.knots, X[startIndex: endIndex, :]); # print(speed1.A.flatten().tolist()); plt.figure(1, (12, 8)); plt.get_current_fig_manager().window.maximize(); plt.subplot(211); plt.plot(X.A.flatten(), y.A.flatten(), "-xb"); plt.plot(X.A.flatten(), yHeat.A.flatten(), "-r"); plt.plot(X[startIndex: endIndex, :].A.flatten(), y[startIndex: endIndex, :].A.flatten(), "or"); plt.subplot(212); plt.plot(X[startIndex: endIndex, :].A.flatten(), y[startIndex: endIndex, :].A.flatten(), "-xb"); plt.plot(X[startIndex: endIndex, :].A.flatten(), yHeat[startIndex: endIndex, :].A.flatten(), "-r"); plt.show(block=True); plt.close();
def data_handler(): index_list = list() train_list = list() test_list = list() train_num, test_num = data_partitions[0], data_partitions[1] print(train_num, test_num) data = pd.read_csv(filename, delimiter=',', dtype=None, header=None) # We should Create numpy array for manipulation numpy_data = np.array(data) labels = np.array(data.head(1)) for data_class in classes: # index_list is a list of numpy array int64 type index_list.append(np.where(labels == data_class)[1]) for one_class in index_list: train_list.extend(one_class[0:train_num]) test_list.extend(one_class[train_num:]) print(train_list) print(test_list) train = np.array(numpy_data[:, train_list]) test = np.array(numpy_data[:, test_list]) print(train.shape, test.shape) np.savetxt(train_filename, train, delimiter=',', fmt='%i') np.savetxt(test_filename, test, delimiter=',', fmt='%i') # Call programs # Knn print("KNN classifier") obj = KnnClassification.KnnClassification(10, train_filename, test_filename) obj.train() # Centroid method print("Centroid classifier") obj = CentroidMethod.CentroidMethod(train_filename, test_filename) obj.pre_process() obj.train() # Linear Regression print("Linear regression") obj = LinearRegression.LinearRegression(train_filename, test_filename) obj.compute_coefficients() # SVM print("SVM classifier") obj = Svm.Svm(train_filename, test_filename) obj.train()
def isConstant(y, periods, alpha): if y.var() == 0: return True; p1 = [DataHelper.testWhiteNoise(y - y.mean(), m) for m in periods]; if np.any(np.mat(p1) <= alpha): return False; p2 = LinearRegression.LinearRegression().fit(np.mat(range(0, y.shape[0])).T, y).betaP; if p2[1, 0] <= alpha: return False; p3 = DataHelper.testRunsLeft((y > np.quantile(y, 0.5)) - 0); if p3 <= alpha: return False; print("{0}, {1}, {2}".format(p1, p2.T, p3)); return True;
def main(): #load dataset data = np.genfromtxt("../datasets/mdataset.csv", delimiter=",") #create model linreg = LinearRegression(data, 2, 'test') linreg.describeModel() #training model epochs = 60 linreg.training(epochs, 0.001) ##plot data with result lines plt.figure(1) axis1 = [min(data[:,0]), max(data[:,0]), min(data[:,2]), max(data[:,2])] axis2 = [min(data[:,1]), max(data[:,1]), min(data[:,2]), max(data[:,2])] axis = [min([axis1[0], axis2[0]]), max([axis1[1], axis2[1]]), min([axis1[2], axis2[2]]), max([axis1[3], axis2[3]])] setx = np.asmatrix(np.linspace(axis[0], axis[1])).T x0 = np.ones((setx.size, 1)) x = np.concatenate((x0, setx, setx), axis=1) plt.scatter(data[:,0], data[:,2]) plt.scatter(data[:,1], data[:,2]) plt.plot(setx, linreg.modelFunction(x)) plt.axis(axis) #plt.subplot(212) #setx = np.linspace(axis[0], axis[1]) #plt.plot(setx, linreg.modelFunction(setx)) #plt.axis(axis) plt.show()
def testAmplitude(): startIndex, endIndex = 2855, 2880; data = []; y = np.mat(data).T; h, M = 24, 3; X = np.mat(np.arange(y.shape[0])).T; # m = LinearRegression.LinearRegression().fit(X, y, [LinearRegression.RegressionSplineFunction(h + M - 2, M)]); m = LinearRegression.LinearRegression().fit(X, y, [LinearRegression.RegressionSplineFunction(int(h * 60 / 60) + M - 2, M)]); yHeat = m.predictValue(X); amplitude = y[startIndex: endIndex, :] - yHeat[startIndex: endIndex, :]; print(amplitude.A.flatten().tolist()); plt.figure(1, (12, 8)); plt.get_current_fig_manager().window.maximize(); plt.subplot(211); plt.plot(X.A.flatten(), y.A.flatten(), "-xb"); plt.plot(X.A.flatten(), yHeat.A.flatten(), "-r"); plt.subplot(212); plt.plot(X[startIndex: endIndex, :].A.flatten(), y[startIndex: endIndex, :].A.flatten(), "-xb"); plt.plot(X[startIndex: endIndex, :].A.flatten(), yHeat[startIndex: endIndex, :].A.flatten(), "-r"); plt.show(block=True); plt.close();
def detectSpeed(j, f): print("speed {0} started".format(j)); M = 2; h = 1; timespan = 6; size, speed = int(3600 / f), []; # data = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/Realtime_30/__JNRTDB_YCH_LIC6205.PV.npy")).T; data = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/{0}/2020-08-01/data.npy".format(f))); marks = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/{0}/2020-08-01/marks.npy".format(f))); # y1 = data[:size * (h + 0), j]; # X1 = np.mat(np.arange(y1.shape[0])).T; # knots = findKnots2(y1.A.flatten()); # f1 = LinearRegression.RegressionSplineFunction(int((h + 0) * 60 / timespan) + M - 2, M, knots); # m1 = LinearRegression.LinearRegression().fit(X1, y1, [f1]); # sY1 = m1.predictValue(X1); # X1 = X1[:, :]; # y1 = y1[:, :]; # sY1 = sY1[:, :]; # speed.extend(getSpeedM2(m1.beta, f1.knots, X1).A.flatten().tolist()); if not os.path.isfile(f"{f}/speed_{j}_speed.npy"): totalCount = math.floor((data.shape[0] - 0) / (size * h)); for i in range(0, totalCount): y2 = data[i * size * h - 0:(i + 1) * size * h + 0, j]; X2 = np.mat(np.arange(y2.shape[0])).T; knots = findKnots3(y2.A.flatten()); f2 = LinearRegression.RegressionSplineFunction(int((h + 0) * 60 / timespan) + M - 2, M, knots); m2 = LinearRegression.LinearRegression().fit(X2, y2, [f2]); sY2 = m2.predictValue(X2); X2 = X2[:, :]; y2 = y2[:, :]; sY2 = sY2[:, :]; speed.extend(getSpeedM2(m2.beta, f2.knots, X2).A.flatten().tolist()); # plt.figure(1, (12, 8)); # # plt.get_current_fig_manager().window.showMaximized(); # plt.subplot(111); # plt.title(f"{i}, {m2.r2}"); # plt.plot(X2.A.flatten(), y2.A.flatten(), "-xk"); # plt.plot(X2.A.flatten(), sY2.A.flatten(), "-or"); # for x in f2.knots: # plt.axvline(x, color = "b"); # # plt.scatter(f1.knots, [y1.mean()] * len(f1.knots), marker="*", color = "b"); # # plt.subplot(212); # # plt.title(str(i)); # # plt.plot(X2.A.flatten(), y2.A.flatten(), "-x"); # # plt.plot(X2.A.flatten(), sY2.A.flatten(), color = "red"); # plt.show(block = True); # plt.savefig(f"/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/speed_images_history_YCH_LIC6206.PV/{i}.png"); # print(f"{i}/{totalCount} saved."); # plt.close(); # X1, y1, sY1, f1 = X2, y2, sY2, f2; print("speed history completed."); speed = np.array(speed); np.save(f"{f}/speed_{j}_speed.npy", speed); else: speed = np.load(f"{f}/speed_{j}_speed.npy"); speedMean, speedStd = speed.mean(), speed.std(); print(np.logical_or((speed - speedMean) / speedStd < -6, (speed - speedMean) / speedStd > 6).sum()); plt.figure(1, (12, 8)); plt.get_current_fig_manager().window.showMaximized(); plt.hist(speed, bins = 1000); for x in [speedMean, speedMean - 6 * speedStd, speedMean + 6 * speedStd]: plt.axvline(x, color = "b"); plt.show(block = True); plt.close(); deltaValues = np.diff(data[:, j], 1, 0); deltaMean, deltaStd = deltaValues.mean(), deltaValues.std(); print(np.logical_or((deltaValues - deltaMean) / deltaStd < -6, (deltaValues - deltaMean) / deltaStd > 6).sum()); plt.figure(1, (12, 8)); plt.get_current_fig_manager().window.showMaximized(); plt.hist(deltaValues.A.flatten(), bins = 1000); for x in [deltaMean, deltaMean - 6 * deltaStd, deltaMean + 6 * deltaStd]: plt.axvline(x, color = "b"); plt.show(block = True); plt.close(); indices1 = np.argwhere(speed < (speedMean - 6 * speedStd))[:, 0].flatten().tolist() + np.argwhere(speed > (speedMean + 6 * speedStd))[:, 0].flatten().tolist(); indices1.sort(); # showAnomaly(indices1, j, size, data, marks); # h = 1; # startIndex, offset, values = size * h, int(12 * 60 / f), None; # if not os.path.isfile("{0}/speed_{1}_values.npy".format(f, j)): # ftn = LinearRegression.RegressionSplineFunction(int(h * 60 / timespan) + M - 2, M); # X = ftn.getX(np.mat(np.arange(size * h)).T); # x = np.mat([size * h - 1 - offset]); # # with multiprocessing.Pool(psutil.cpu_count(False) - 2) as pool: # if M == 3: # T = np.multiply(np.hstack(tuple([x - k for k in ftn.knots])), np.hstack(tuple([(x > k) - 0 for k in ftn.knots]))); # # # values = [calcSpeedM3(i, j, offset, size, h, data, X, x, T) for i in range(startIndex, size * 24 * 10)]; # # showDiff(speed[startIndex: startIndex + len(values)].A.flatten().tolist(), values, size * 6); # # values = pool.starmap(calcSpeedM3, [(i, j, offset, size, h, data, X, x, T) for i in range(startIndex, data.shape[0] - offset)]); # else: # T = np.hstack(tuple([(x > k) - 0 for k in ftn.knots])); # # # values = [calcSpeedM2(i, j, offset, size, h, data, X, T) for i in range(startIndex, size * 24 * 10)]; # # showDiff(speed[startIndex: startIndex + len(values)].A.flatten().tolist(), values, size * 6); # # values = pool.starmap(calcSpeedM2, [(i, j, offset, size, h, data, X, T) for i in range(startIndex, data.shape[0] - offset)]); # np.save("{0}/speed_{1}_values.npy".format(f, j), np.mat(values).T); # print("realtime speed completed."); # # values = np.load(f"{f}/speed_{j}_values.npy"); # valuesMean, valuesStd = values.mean(), values.std(); # plt.figure(1, (12, 8)); # plt.get_current_fig_manager().window.showMaximized(); # plt.hist(values, bins = 1000); # plt.show(block = True); # plt.close(); # indices2 = (np.argwhere(values < (speedMean - 6 * speedStd))[:, 0].flatten() + startIndex).tolist() + (np.argwhere(values > (speedMean + 6 * speedStd))[:, 0].flatten() + startIndex).tolist(); # indices2.sort(); # showAnomaly(indices2, j, size, data, marks); forest = None; if not os.path.isfile("{0}/speed_{1}_forest.npy".format(f, j)): dataSet = np.mat(speed).T; forest = IsolationForest(200, 2 ** 9, CurvesThresholdFinder(0.65, 0.68, 0.73, False)); forest.fill(dataSet); print("forest fill completed"); forest.train(dataSet); print("forest train completed"); with open("{0}/speed_{1}_forest.npy".format(f, j), "wb") as file: pickle.dump(forest, file, protocol = pickle.DEFAULT_PROTOCOL); else: with open("{0}/speed_{1}_forest.npy".format(f, j), "rb") as file: forest = pickle.load(file); # scores = None; # if not os.path.isfile("{0}/speed_{1}_scores.npy".format(f, j)): # with multiprocessing.Pool(psutil.cpu_count(False) - 2) as pool: # scores = pool.map(forest.getAnomalyScore, [np.mat([v]) for v in values.A.flatten().tolist()]); # np.save("{0}/speed_{1}_scores.npy".format(f, j), np.mat(scores).T); # print("realtime score completed."); # # scores = np.mat(np.load("{0}/speed_{1}_scores.npy".format(f, j))); # plt.figure(1, (12, 8)); # plt.get_current_fig_manager().window.showMaximized(); # plt.hist(scores.A.flatten(), bins = 1000); # plt.show(block = True); # plt.close(); scores = np.array(forest.scores); indices3 = np.argwhere(scores >= forest.threshold)[:, 0].flatten().tolist(); indices3.sort(); # showAnomaly(indices3, j, size, data, marks); # indices4 = (np.argwhere(values < (speedMean - 3 * speedStd))[:, 0].flatten()).tolist() + (np.argwhere(values > (speedMean + 3 * speedStd))[:, 0].flatten()).tolist(); # indices4 = [i + startIndex for i in indices4 if values[i, 0] < speedMean - 6 * speedStd or values[i, 0] > speedMean + 6 * speedStd or scores[i] >= forest.threshold]; # indices4.sort(); # showAnomaly(indices4, j, size, data, marks); # deltaScores = None; # if not os.path.isfile("{0}/speed_{1}_delta_scores.npy".format(f, j)): # with multiprocessing.Pool(psutil.cpu_count(False) - 2) as pool: # deltaScores = pool.map(forest.getAnomalyScore, [np.mat([v]) for v in deltaValues.A.flatten().tolist()]); # np.save("{0}/speed_{1}_delta_scores.npy".format(f, j), np.mat(deltaScores).T); # # deltaScores = np.mat(np.load("{0}/speed_{1}_delta_scores.npy".format(f, j))); # indices5 = [i + 1 for i in range(0, deltaValues.shape[0]) if deltaValues[i, 0] < deltaMean - 6 * deltaStd or deltaValues[i, 0] > deltaMean + 6 * deltaStd]; indices5 = np.argwhere(deltaValues < (deltaMean - 6 * deltaStd))[:, 0].flatten().tolist() + np.argwhere(deltaValues > (deltaMean + 6 * deltaStd))[:, 0].flatten().tolist(); indices5 = [i + 1 for i in indices5]; indices5.sort(); # showAnomaly(indices5, j, size, data, marks); # showAnomaly2(indices4, indices5, j, size, data, marks); print("speed {0} completed".format(j));
def detectAmplitude(j, f): print("amplitude {0} started".format(j)); M = 3; h = 24; size, sY = int(3600 / f), []; data = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/{0}/2020-08-01/data.npy".format(f))); marks = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/{0}/2020-08-01/marks.npy".format(f))); # y1 = data[:size * (h + 1), j]; # X1 = np.mat(np.arange(y1.shape[0])).T; # m1 = LinearRegression.LinearRegression().fit(X1, y1, [LinearRegression.RegressionSplineFunction((h + 1) + M - 2, M)]); # sY1 = m1.predictValue(X1); # X1 = X1[:-size, :]; # y1 = y1[:-size, :]; # sY1 = sY1[:-size, :]; # sY.extend(sY1.A.flatten().tolist()); if not os.path.isfile(f"{f}/amplitude_{j}_amplitude.npy"): totalCount = math.floor((data.shape[0] - 0) / (size * h)); for i in range(0, totalCount): y2 = data[i * size * h - 0:(i + 1) * size * h + 0, j]; X2 = np.mat(np.arange(y2.shape[0])).T; m2 = LinearRegression.LinearRegression().fit(X2, y2, [LinearRegression.RegressionSplineFunction(h + M - 2, M)]); sY2 = m2.predictValue(X2); X2 = X2[:, :]; y2 = y2[:, :]; sY2 = sY2[:, :]; sY.extend(sY2.A.flatten().tolist()); # plt.figure(1, (12, 8)); # # plt.get_current_fig_manager().window.showMaximized(); # plt.subplot(111); # plt.title(f"{i}, {m2.r2}"); # plt.plot(X2.A.flatten(), y2.A.flatten(), "-xk"); # plt.plot(X2.A.flatten(), sY2.A.flatten(), "-or"); # for x in f2.knots: # plt.axvline(x, color = "b"); # # plt.scatter(f1.knots, [y1.mean()] * len(f1.knots), marker="*", color = "b"); # # plt.subplot(212); # # plt.title(str(i)); # # plt.plot(X2.A.flatten(), y2.A.flatten(), "-x"); # # plt.plot(X2.A.flatten(), sY2.A.flatten(), color = "red"); # plt.show(block = True); # plt.savefig(f"/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/amplitude_images_history_YCH_FI6221.PV/{i}.png"); # print(f"{i}/{totalCount} saved."); # plt.close(); # X1, y1, sY1, f1 = X2, y2, sY2, f2; print("amplitude history completed."); amplitude = data[: len(sY), j].A.flatten() - np.array(sY); np.save(f"{f}/amplitude_{j}_amplitude.npy", amplitude); else: amplitude = np.load(f"{f}/amplitude_{j}_amplitude.npy"); amplitudeMean, amplitudeStd = amplitude.mean(), amplitude.std(); print(DataHelper.testNormalDistribution(amplitude)); # plt.figure(1, (12, 8)); # plt.get_current_fig_manager().window.showMaximized(); # plt.hist(amplitude, bins = 1000); # plt.show(block = True); # plt.close(); indices1 = np.argwhere(amplitude < (amplitudeMean - 6 * amplitudeStd))[:, 0].flatten().tolist() + np.argwhere(amplitude > (amplitudeMean + 6 * amplitudeStd))[:, 0].flatten().tolist(); indices1.sort(); showAnomaly(indices1, j, size, data, marks); h, m = 24, 12; # 24 hours, 12 minutes startIndex, offset, values = size * h, int(m * 60 / f), None; if not os.path.isfile(f"{f}/amplitude_{j}_values.npy"): with multiprocessing.Pool(psutil.cpu_count(False) - 2) as pool: values = pool.starmap(calcAmplitude, [(i, j, offset, size, h, M, data) for i in range(startIndex, data.shape[0] - offset)]); np.save("{0}/amplitude_{1}_values.npy".format(f, j), np.array(values)); else: values = np.load(f"{f}/amplitude_{j}_values.npy"); # plt.figure(1, (12, 8)); # plt.get_current_fig_manager().window.showMaximized(); # plt.hist(values, bins = 1000); # plt.show(block = True); # plt.close(); indices2 = (np.argwhere(values < (amplitudeMean - 6 * amplitudeStd))[:, 0] + startIndex).tolist() + (np.argwhere(values > (amplitudeMean + 6 * amplitudeStd))[:, 0] + startIndex).tolist(); indices2.sort(); showAnomaly(indices2, j, size, data, marks); # forest = None; # if not os.path.isfile("{0}/amplitude_{1}_forest.npy".format(f, j)): # forest = IsolationForest(200, 2 ** 9, CurvesThresholdFinder(0.65, 0.68, 0.73, False)); # forest.fill(amplitude); # print("forest fill completed"); # forest.train(amplitude); # print("forest train completed"); # # with open("{0}/amplitude_{1}_forest.npy".format(f, j), "wb") as file: # pickle.dump(forest, file, protocol = pickle.DEFAULT_PROTOCOL); # else: # with open("{0}/amplitude_{1}_forest.npy".format(f, j), "rb") as file: # forest = pickle.load(file); # # scores = None; # if not os.path.isfile("{0}/amplitude_{1}_scores.npy".format(f, j)): # with multiprocessing.Pool(psutil.cpu_count(False) - 2) as pool: # scores = pool.map(forest.getAnomalyScore, [np.mat([v]) for v in values.A.flatten().tolist()]); # np.save("{0}/amplitude_{1}_scores.npy".format(f, j), np.mat(scores).T); # # scores = np.mat(np.load("{0}/amplitude_{1}_scores.npy".format(f, j))); # plt.figure(1, (12, 8)); # plt.get_current_fig_manager().window.maximize(); # plt.hist(scores.A.flatten(), bins = 1000); # plt.show(block = True); # plt.close(); # indices3 = (np.argwhere(scores >= forest.threshold)[:, 0].flatten() + startIndex).tolist(); # indices3.sort(); # showAnomaly(indices3, j, size, data, marks); # indices4 = (np.argwhere(values < (amplitudeMean - 3 * amplitudeStd))[:, 0].flatten()).tolist() + (np.argwhere(values > (amplitudeMean + 3 * amplitudeStd))[:, 0].flatten()).tolist(); # indices4 = [i + startIndex for i in indices4 if values[i, 0] < amplitudeMean - 6 * amplitudeStd or values[i, 0] > amplitudeMean + 6 * amplitudeStd or scores[i] >= forest.threshold]; # indices4.sort(); # showAnomaly(indices4, j, size, data, marks); print("amplitude {0} completed".format(j));
def calcSpeedM3(i, j, offset, size, h, data, X, x, T): y = data[i + 1 + offset - size * h:i + 1 + offset, j]; m = LinearRegression.LinearRegression().fit(X, y); return getSpeedM3Internal(m.beta, x, T)[0, 0];
import LinearRegression from preprosessing import * from sklearn.datasets import load_boston from sklearn.linear_model import LinearRegression as LR import numpy as np if __name__ == "__main__": data = load_boston() X = mean_norm(data['data']) # Normalizes the data set y = data['target'] X_train, y_train, X_test, y_test = split_data(X, y) model = LinearRegression.LinearRegression() theta, cost = model.gradient_descent(X_train, y_train) print(mean_norm(X_test).dot(theta)) model1 = LR() model1.fit(X_train, y_train) print(model1.predict(mean_norm(X_test)))
def calcAmplitude(i, j, offset, size, h, M, data): X = np.mat(np.arange(size * h)).T; y = data[i + 1 + offset - size * h: i + 1 + offset, j]; return data[i, j] - LinearRegression.LinearRegression().fit(X, y, [LinearRegression.RegressionSplineFunction(h + M - 2, M)]).predictValue(np.mat([size * h - 1 - offset]))[0, 0];
from pyspark.ml.linalg import Vectors from pyspark.ml.feature import VectorAssembler assembler = VectorAssembler(inputCols = [inputCol1, inputCol2, inputCol3, inputCol4 ],outputCol = ‘features’) #The output variable will have all the columns in the data set plus an additional features column, which is a vector of all #the inputColumns we gave output = assembler.transform(Indexed) final_data = output.select(‘features’, ‘dependentVariableName’) #Splitting the data into training and test sets train_data, test_data = final_data.randomSplit([0.7,0.3]) train_data.show() test_data.describe.show() #Building the linear regression model with input as 'features' model = LinearRegression(featuresCol = ‘features’, labelCol = ‘<outputColName>’, predictionCol = ‘prediction’) lrModel = lr.fit(train_data) #Evaluate how our model performed on test data test_results = lrModel.evaluate(test_data) test_resulsts.residuals.show() test_results.rootMeanSquaredError #Model perfomance parameter : R-square test_results.r2 #Check what the predictions will be on data that doesn’t have a label value unlabeled_data = test_data.select(‘features’) predictions = lrModel.transform(unlabeled_data) predictions.show()
data_frame, 39, 9) centroid_data_frame_train = deepcopy(train_data_with_labels) centroid_data_frame_test = deepcopy(test_data_with_labels) # make_file_and_save_data_train = Task_E.store(train_data_set_without_labels.T, train_y, 'jenil_train.csv') # make_file_and_save_data_test = Task_E.store(test_data_set_without_labels.T, test_y, 'jenil_test.csv') k = 5 knn_object = Knn(k) data_with_euclidean_distance = knn_object.calculate_distance( train_data_with_labels.values, test_data_with_labels.values) accuracy = knn_object.get_accuracy([ (k['Test Label'], k['Classification']) for k in data_with_euclidean_distance ]) print('Accuracy of Knn is:', accuracy) # Linear Regression linear_regression_object = LinearRegression.LinearRegression() N_train, L_train, Xtrain = len( train_y), train_y, train_data_set_without_labels.T N_test, Ytest, Xtest = len( test_y), test_y, test_data_set_without_labels.T Ytrain = linear_regression_object.indicator_matrix(L_train) linear_regression_object.accuracy(N_train, N_test, Xtrain, Xtest, Ytrain, Ytest) # SVM svm_object = Svm.SupportVectorMachine() svm_object.find_accuracy(train_data_set_without_labels, train_y, test_data_set_without_labels, test_y)
x = 'area_mean' sns.lmplot(x=x, y='Target', data=df, ci=None) plt.ylim([-0.5, 1.5]) plt.xlim([df[x].min()- df[x].std(), df[x].max() + df[x].std()]) plt.show( from sklearn.linear_model import LinearRegression linreg = LinearRegression().fit(df.area_mean.values.reshape(-1, 1), df.Target) # compute prediction for area_mean=350 using the predict method linreg.predict(np.array([[5], [350]])) df['Pred_class'] = df.Prediction.map(lambda x: 1 if x> 0.05 else 0 ) # fit a logistic regression model and store the class predictions from sklearn.linear_model import LogisticRegression logreg = LogisticRegression(C=1e9, solver='lbfgs') feature_cols = ['area_mean'] X = df[feature_cols] y = df.Target logreg.fit(X, y) df['Log_Prediction'] = logreg.predict(X) df['Log_probabilities'] = logreg.predict_proba(X)[:,1] (df.Pred_class != df.Log_Prediction).sum()
dataMat = mat(dataMat); labelMat = mat(labelMat) labelMat = labelMat.T sum_error_train = 0; sum_error = 0; sum_rr = 0 for j in range(10): z = range(27) random.shuffle(z) x = zeros([20, 5]); y = zeros([20, 1]); newx = zeros([7, 5]); newy = zeros([7, 1]) x = mat(x); y = mat(y); newx = mat(newx); newy = mat(newy) for i in range(20): x[i] = dataMat[z[i]] y[i] = labelMat[z[i]] for i in range(20, 27): newx[i-20] = dataMat[z[i]] newy[i-20] = labelMat[z[i]] # 调用线性回归方法,得到训练集和测试集误差和r方 error_train, error, rr = LinearRegression(x, y, newx, newy) sum_error_train += error_train; sum_error += error; sum_rr += rr error_train = sum_error_train / 10; error = sum_error / 10; rr = sum_rr / 10 print '训练集均方误差=', error_train print '验证集均方误差=', error print 'r方=', rr elif a == '2': #n次k折交叉验证 dataMat, labelMat = loadDataSet('10.txt') dataMat = mat(dataMat); labelMat = mat(labelMat) labelMat = labelMat.T sum_error_train = 0; sum_error = 0; sum_rr = 0 for j in range(10): z = range(27) random.shuffle(z)
rom sklearn.linear_model import LinearRegression reg = Ridge() reg = LinearRegression() # making an object reg.fit(nd1,nd2) reg.predict(nd1) #utils shuffle #preprocessing LabelEncoder, OrdinalEncoder , label_binarize PolynomialFeatures MinMaxScaler, StandardScaler, RobustScaler # scale #model_selection train_test_split , cross_val_score , cross_validate, learning_curve## only one split, cv split only scores, cv split scores and more info, cross_validate on different train_sizes GridSearchCV , RandomizedSearchCV , validation_curve # on params #metrics from sklearn.metrics import make_scorer from sklearn.metrics import mean_squared_error, f1_score, accuracy_score, precision_score, recall_score from sklearn.metrics import confusion_matrix, classification_report #cnfmatrix = confusion_matrix(Y,predicted_Y) from sklearn.metrics import precision_recall_curve , roc_curve , roc_auc_score ############################################################
import numpy as np import pandas as pd import LinearRegression as lr from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.datasets import load_boston dataset = load_boston() X = dataset.data y = dataset.target print(f"This dataset contains {X.shape[0]} entries and {X.shape[1]} features") X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) my_regressor = lr.LinearRegression(X_train, y_train).fit() sklearn_regressor = LinearRegression().fit(X_train, y_train) my_train_accuracy = my_regressor.score() sklearn_train_accuracy = sklearn_regressor.score(X_train, y_train) my_test_accuracy = my_regressor.score(X_test, y_test) sklearn_test_accuracy = sklearn_regressor.score(X_test, y_test) result = pd.DataFrame([[my_train_accuracy, sklearn_train_accuracy], [my_test_accuracy, sklearn_test_accuracy]], ['Training Acc.', 'Test Acc.'], ['Our\'s', 'Sklearn\'s']) print(result)
import sklearn.linear_model import LinearRegression data = pd.read_csv( path ) x = data[["col1","col2"]] # input y = data["col3"] # target # check shapes print( x.shape, y.shape) # if are the same we can proceed if x.shape != y.shape: x = x.values.reshape(a,b) y = y.values.reshape(c,d) reg = LinearRegression() # initialize the model reg.fit(x,y) # model performance parameter R_squared = reg.score(x,y) coefficents = reg.coef_ intercept = reg.intercept_ # prediction x_value = 155.00 reg.predict( x_value )
import LinearRegression as us plt.title("Toy Dataset") X1, Y1 = make_regression(n_samples=100000, n_features=100, n_informative=80, n_targets=1, noise=0.5) X1_train = X1[:-10000] Y1_train = Y1[:-10000] X1_test = X1[-10000:] Y1_test = Y1[-10000:] our_rg = us.LinearRegression() our_rg.fit(X1_train, Y1_train) our_Y1_pred = our_rg.predict(X1_test) our_Y1_fit = our_rg.predict(X1_train) def count_predict_Loss(y, target): sum = 0 for i in range(len(y)): sum += (y[i] - target[i])**2 return sum / len(y) train_loss = count_predict_Loss(our_Y1_fit, Y1_train) test_loss = count_predict_Loss(our_Y1_pred, Y1_test) print("train_loss: ", train_loss)
import sys import LinearRegression from LinearRegression import LinearRegression if __name__ == "__main__": inputFileName = "Input/auto_mpg.csv" outputDirectory = "Output/" numInstances = 398 numAttributes = 8 linearRegression = LinearRegression(inputFileName, outputDirectory, numInstances, numAttributes) linearRegression.process()
## Lineer regresyon test kodları import numpy as np from sklearn import datasets boston_X, boston_y = datasets.load_boston(return_X_y=True) boston_X = boston_X[:, np.newaxis, 5] X_train = boston_X[:-20] X_test = boston_X[-20:] y_train = boston_y[:-20] y_test = boston_y[-20:] from LinearRegression import * lin = LinearRegression() lin.buildModel(X_train, y_train) lin.evaluateModel(X_test, y_test) lin.predictValue(5) ########################################### ## KNN test kodları from sklearn.datasets import load_iris from sklearn.utils import shuffle iris_X, iris_y = load_iris(return_X_y=True) iris_X, iris_y = shuffle(iris_X, iris_y) X_train = iris_X[:-30] X_test = iris_X[-30:] y_train = iris_y[:-30]
def PlotData (lastPxx, UnLoadPxx1, UnLoadTime1, WriteFolderName, WriteFileNameEnd): X = timeStep*1e-6*np.array(UnLoadTime1) UnLoadTime1 = timeStep*1e-6*np.array(UnLoadTime1) UnLoadPxx1 = np.array(UnLoadPxx1) Y = np.log(UnLoadPxx1) # print(Y-UnLoadPxx1) # plt.scatter(X, Y, color='b', label='log(pxx)') # plt.scatter(X, UnLoadPxx1, color='r', label='pxx') # plt.legend() # plt.show() # plt.close() # print(X.shape) # print(Y.shape) # print(X) # print(Y) X.shape = (X.shape[0], 1) X1 = X X1 = np.hstack(( np.ones((X1.shape[0],1)), X1 )) Iteration, Cost, Theta = LinReg.LinearRegression (X1, Y, learnRate, thisLambda) plt.plot(Iteration[2:], Cost[2:], label='cost') plt.legend() plt.show() plt.close() print(Theta) H = X1.dot(Theta) for i in range (X.shape[1]): plt.scatter(X[:, i], Y, color='b', label='target') plt.scatter(X[:, i], H, color='r', label='fit') plt.legend() plt.show() plt.close() # # realTheta0 = random.randint(1,1000) # # # realTheta1 = random.randint(1,5) # # realTheta1 = random.uniform(1.0, 5.0) # # X2 = np.linspace(0, 12, 100) # # Y2 = realTheta0*np.exp(-realTheta1*X2) # # Y21 = np.log(Y2) # # X2.shape = (X2.shape[0], 1) # # # X21 = (X2 - np.mean(X2))/np.std(X2) # # X21 = X2 # # X21 = np.hstack(( np.ones((X21.shape[0],1)), X21 )) # # Iteration, Cost, Theta = LinReg.LinearRegression (X21, Y21, learnRate, thisLambda) # # plt.plot(Iteration, Cost, label='cost') # # plt.legend() # # plt.show() # # plt.close() # # print(realTheta0, realTheta1, Theta) # # # print(realTheta0, realTheta1, np.exp(Theta[0]), Theta) # # plt.scatter(X2, Y21, color='b', label='target') # # plt.scatter(X2, X21.dot(Theta), color='r', label='fit') # # # plt.scatter(X2, np.log(realTheta0*np.exp(Theta[1]*X2)), color='g', label='fit+') # # plt.legend() # # plt.show() # # plt.close() # X = np.linspace(0, fit1, 100) # X0 = np.linspace(0, stopTimeInNs, 100) # X1 = np.linspace(fit1, stopTimeInNs, 100) # X2 = np.linspace(fit2, stopTimeInNs, 100) # X3 = np.linspace(fit3, stopTimeInNs, 100) # FitPxx = lastPxx*np.exp(-X*Fit[countChosen, countStrain]) # fig = plt.figure(1, figsize=(3.5, 3.5)) # plt.subplot(111) # plt.plot(UnLoadTime1, UnLoadPxx1, 'o', linewidth=2, label='data') # plt.plot(X, FitPxx, linewidth=2, label='fit:' + str(Fit[countChosen, countStrain])) # plt.xlabel('Time (ns)') # plt.ylabel('Stress (MPa)') # plt.legend(bbox_to_anchor=(0., 0.98, 1., .104), loc=3, ncol=3, mode="expand", borderaxespad=0., fontsize = 'xx-small') # plt.xticks(rotation=45) # plt.tight_layout() # plt.savefig(os.path.join(WriteFolderName, "Fit" + WriteFileNameEnd + ".png")) # plt.show() # plt.close(fig) UnLoadTime1 = [] UnLoadStrain1 = [] UnLoadPxx1 = []
import LinearRegression import numpy as np X = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) # Y = 0 + 1X Y = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) modal = LinearRegression.LinearRegression() modal.train(X, Y) print(modal.predict(14))
random_data = np.random.rand(len(y), total_features) # Update each feature with slope and randomness random_data[:, 0] = np.ones(len(y)) for i in range(0, total_features - 1): random_data[:, i + 1] += (y * slope[i]) + np.random.uniform( -randomness[i], randomness[i], (len(y))) return (random_data[:, 0:total_features], y) # Generate random data x, y = generate_data(3, [-0.5, 0.2], [0.3, 1], (0, 10, 0.1)) # First model use gradient descent linear_regression = lr.LinearRegression(3) linear_regression.batch_size = 25 linear_regression.total_epochs = 40 # Second model use normal equation linear_regression_1 = lr.LinearRegression(3) linear_regression_1.weight = np.copy(linear_regression.weight) print(f'---Gradient Descent---') print(f'Initial cost : {linear_regression.cost(x, y)}') # Start gradient Descent on first model start = timeit.default_timer() linear_regression.gradient_descent(x, y) linear_regression.gradient_descent(x, y) taken = (timeit.default_timer() - start)
testErr = [0.0] * nfold allIndex = range(0, m) for i in range(0, nfold): testIndex = range((foldSize * i), foldSize * (i + 1)) trainIndex = list(set(allIndex) - set(testIndex)) trainX = X[trainIndex, :] trainY = Y[trainIndex] testX = X[testIndex, :] testY = Y[testIndex] # set parameter alpha = 0.01 lam = 0.1 model = LR.LinearRegression(trainX, trainY, alpha, lam) model.run(400, printIter=False) trainPred = model.predict(trainX) trainErr[i] = sum((trainPred - trainY)**2) / len(trainY) testPred = model.predict(testX) testErr[i] = sum((testPred - testY)**2) / len(testY) print "train Err=", trainErr[i], "test Err=", testErr[i] print " " print "summary:" print "average train err=", numpy.mean(trainErr) print "average test err=", numpy.mean(testErr)
# -*- coding: utf-8 -*- """ Created on Wed Aug 29 20:45:43 2018 @author: htshinichi """ from matplotlib import pyplot as plt import LinearRegression import pandas as pd import numpy as np data = pd.read_csv("test_Regression.csv") X = data.x1 y = data.label model_linr = LinearRegression.LinearRegression() model_linr.fit(data) print(model_linr.weights) print(model_linr.bias) line_X = np.arange(X.min(), X.max())[:, np.newaxis] line_y = model_linr.predict(line_X) plt.plot(line_X, line_y, color='navy', linewidth=2, label='Linear regressor') plt.scatter(X, y, color='yellowgreen', marker='.', label='Inliers') plt.legend(loc='lower right') plt.xlabel("Input") plt.ylabel("Response") plt.show()