def mainpca(): unknowndata = UnknownData() # 未知数据产生聚类中心 unknowndata = fill(unknowndata, 1778, 336, 254) unknowndata = normalization(unknowndata, 1778, 336) U1, S1, V1 = pca(np.array(unknowndata, dtype='float'), 2) init_centroids = np.array(kpp_centers(U1, 2)) idx, centroids_all = runKmeans(U1, init_centroids, 100) centroids = centroids_all[-1] print("感知不明数据产生的聚类中心点:\n", centroids[0], centroids[1]) # plotData(U1, centroids_all, idx) # plt.savefig('F:\\导出的图片.png') # plt.show() problemdata = ProblemData() # 问题数据 problemdata = fill(problemdata, 2093, 336, 299) problemdata = normalization(problemdata, 2093, 336) U2, S2, V2 = pca(np.array(problemdata, dtype='float'), 2) # plt.scatter(U2[:, 0], U2[:, 1]) testdata = TestData() # 测试正确率的数据 testdata = fill(testdata, 476, 336, 68) testdata = normalization(testdata, 476, 336) U3, S3, V3 = pca(np.array(testdata, dtype='float'), 2) # plt.scatter(U3[:, 0], U3[:, 1],c='orange') # plt.show() # plt.subplot2grid((2,2),(0,0)) # plt.scatter(U2[:, 0], U2[:, 1]) # plt.subplot2grid((2,2),(0,1)) # plt.scatter(U3[:, 0], U3[:, 1],c='orange') # # plt.savefig('F:\\导出的图片1.png') # plt.subplot2grid((2,2),(1,0)) # plotData(U1, centroids_all, idx) # # plt.savefig('F:\\导出的图片3.png') # plt.show() # a = np.random.randint(0, 84) data_7, day, ECI, time, name = PredictData() # 做感知差识别的数据 data_7 = fill(data_7, 7, 336, 1) data_7 = normalization(data_7, 7, 336) U4, S4, V4 = pca(np.array(data_7, dtype='float'), 2) data_arg = U4[day] print("降维后的感知差识别数据:", data_arg[0], data_arg[1]) print("ECI:", ECI) print("time:", time) print("name:", name) T = U2 # 预测 P = U3 num1 = num2 = 0 num3 = num4 = 0 string1 = '该日数据存在感知差问题' string2 = '该日数据感知正常' for i in range(2093): if euler_distance(T[i], centroids[0]) <= euler_distance( T[i], centroids[1]): num1 += 1 else: num2 += 1 if num1 >= num2: print("感知差聚类中心点为:", centroids[0][0], centroids[0][1]) # centroids0为问题小区中心点 for i in range(476): if euler_distance(P[i], centroids[0]) <= euler_distance( P[i], centroids[1]): num3 += 1 print('预测准确度为:', '%.2f' % (100 * num3 / 476), '%') dis1 = euler_distance(data_arg, centroids[0]) dis2 = euler_distance(data_arg, centroids[1]) if dis1 < dis2: string = string1 print(string) else: string = string2 print(string) else: print("感知正常中心点为:", centroids[1][0], centroids[1][1]) # centroids1为问题小区中心点 for i in range(476): if euler_distance(P[i], centroids[0]) >= euler_distance( P[i], centroids[1]): num4 += 1 print('预测准确度为:', '%.2f' % (100 * num4 / 476), '%') dis1 = euler_distance(data_arg, centroids[0]) dis2 = euler_distance(data_arg, centroids[1]) if dis1 > dis2: string = string1 print(string) else: string = string2 print(string) return string, ECI, time, name
# plt.show() # plt.subplot2grid((2,2),(0,0)) # plt.scatter(U2[:, 0], U2[:, 1],c='red') # plt.subplot2grid((2,2),(0,1)) # plt.scatter(U3[:, 0], U3[:, 1],c='blue') # # plt.savefig('F:\\导出的图片1.png') # plt.subplot2grid((2,2),(1,0)) # # plt.scatter(U1[:, 0], U1[:, 1],c='gold') # plotData(U1, centroids_all, idx) # # plt.savefig('F:\\导出的图片3.png') # plt.show() tf.reset_default_graph() a = np.random.randint(0, 12) predictdata = PredictData() # 做感知差识别的数据 predictdata = fill(predictdata, 84, 336, 12) predictdata = normalization(predictdata, 84, 336) U4 = encode(predictdata) data_arg = U4[a*7: (a+1)*7] for i in range(7): print("降维后的第{}天数据".format(i), data_arg[i]) # plt.show() T = U2 P = U3 num1 = num2 = 0 num3 = num4 = 0 for i in range(2093): if euler_distance(T[i], centroids[0]) <= euler_distance(T[i], centroids[1]):
from predict_data import PredictData from fill_normalization import fill, normalization import numpy as np import pandas as pd import matplotlib.pyplot as plt from statsmodels.tsa.arima_model import ARIMA import statsmodels.api as sm data = PredictData() #未知数据产生聚类中心 data = fill(data, 84, 336, 12) data = data.reshape(2016, 14) c = data[:, 1] np.random.seed(5) a = np.random.randint(0, 12) c = c[168 * a:168 * (a + 1)] time = pd.Series(np.array(c, dtype=float), index=pd.date_range(start='2016-10-08', periods=168, freq='H')) print(time) time.plot() # plt.title("column 1 data and diff data") # plt.show() # # ADF单位根检验判断是不是平稳序列 # t = sm.tsa.stattools.adfuller(time, ) # output = pd.DataFrame(index=['Test Statistic Value', "p-value", "Lags Used", "Number of Observations Used", "Critical Value(1%)", "Critical Value(5%)", "Critical Value(10%)"] # , columns=['value']) # output['value']['Test Statistic Value'] = t[0]
ipi = ImmediateData(fm) target_folders = [i for i in files if "_growth" in i] for t in target_folders: tar_file_path = file_path + t + "/" # print('tar_file_path:', tar_file_path) subfiles = os.listdir(tar_file_path) for s in subfiles: if ".xlsx" in s and "~$" not in s: # print(' ', s) fm.initialized_data(t + "/" + s, s.split('.')[0], "p") fm.store_yoy_growth_dataInfo(t + "/" + s, s.split('.')[0]) ipi.run_ipi("baseCompInfo") # Prediction data print("Predict Data") pdd = PredictData(fm) tar_file_path = file_path + 'yuce/std/' subfiles = os.listdir(tar_file_path) predict_categories = [ "EPS", "profitGrowth", "netProfit", "netProfitComp", "ROE", "BPS", "close" ] for i in predict_categories: for j in subfiles: if j.split("_")[0] == i or j.split(".")[0] == i: fm.initialized_data('yuce/std/' + j, j.split(".xlsx")[0], "p", True) fm.store_predict_dataInfo(j.split(".xlsx")[0], i) pdd.run_predict() fm.initialized_data("Industry/ROE.xlsx",
def split_sequences(sequences, n_steps): X, y = list(), list() for i in range(len(sequences)): # find the end of this pattern end_ix = i + n_steps # check if we are beyond the dataset if end_ix > len(sequences) - 1: break # gather input and output parts of the pattern seq_x, seq_y = sequences[i:end_ix, :], sequences[end_ix, :] X.append(seq_x) y.append(seq_y) return np.array(X), np.array(y) data = PredictData() data = fill(data, 84, 336, 12) np.random.seed(5) a = np.random.randint(0, 12) data = data[7 * a:7 * (a + 1)] data.resize(168, 14) scale = MinMaxScaler(feature_range=(0, 1)) data = scale.fit_transform(data) # choose a number of time steps n_steps = 10 # convert into input/output X, y = split_sequences(data, n_steps) n_features = X.shape[2]
def exponential_smoothing(alpha, s): ''' 一次指数平滑 :param alpha: 平滑系数 :param s: 数据序列, list :return: 返回一次指数平滑模型参数, list ''' s_temp = [] s_temp.append(s[0]) for i in range(1, len(s), 1): s_temp.append(alpha * s[i - 1] + (1 - alpha) * s_temp[i - 1]) return s_temp data = PredictData() data = fill(data, 84, 336, 12) data = data.reshape(2016, 14) c = data[:, 1] np.random.seed(5) a = np.random.randint(0, 12) c = c[168 * a:168 * (a + 1)] # print(c) # print(type(c)) #numpy.ndarray # print(c.shape) # (168,) time = pd.Series(np.array(c, dtype=float), index=pd.date_range(start='2018-10-08', periods=168, freq='H')) # print(time) dict_time = {'ds': time.index, 'y': time.values}