Пример #1
0
def mainpca():
    unknowndata = UnknownData()  # 未知数据产生聚类中心
    unknowndata = fill(unknowndata, 1778, 336, 254)
    unknowndata = normalization(unknowndata, 1778, 336)
    U1, S1, V1 = pca(np.array(unknowndata, dtype='float'), 2)
    init_centroids = np.array(kpp_centers(U1, 2))
    idx, centroids_all = runKmeans(U1, init_centroids, 100)
    centroids = centroids_all[-1]
    print("感知不明数据产生的聚类中心点:\n", centroids[0], centroids[1])
    # plotData(U1, centroids_all, idx)
    # plt.savefig('F:\\导出的图片.png')
    # plt.show()

    problemdata = ProblemData()  # 问题数据
    problemdata = fill(problemdata, 2093, 336, 299)
    problemdata = normalization(problemdata, 2093, 336)
    U2, S2, V2 = pca(np.array(problemdata, dtype='float'), 2)
    # plt.scatter(U2[:, 0], U2[:, 1])

    testdata = TestData()  # 测试正确率的数据
    testdata = fill(testdata, 476, 336, 68)
    testdata = normalization(testdata, 476, 336)
    U3, S3, V3 = pca(np.array(testdata, dtype='float'), 2)
    # plt.scatter(U3[:, 0], U3[:, 1],c='orange')
    # plt.show()

    # plt.subplot2grid((2,2),(0,0))
    # plt.scatter(U2[:, 0], U2[:, 1])
    # plt.subplot2grid((2,2),(0,1))
    # plt.scatter(U3[:, 0], U3[:, 1],c='orange')
    # # plt.savefig('F:\\导出的图片1.png')
    # plt.subplot2grid((2,2),(1,0))
    # plotData(U1, centroids_all, idx)
    # # plt.savefig('F:\\导出的图片3.png')
    # plt.show()

    # a = np.random.randint(0, 84)
    data_7, day, ECI, time, name = PredictData()  # 做感知差识别的数据
    data_7 = fill(data_7, 7, 336, 1)
    data_7 = normalization(data_7, 7, 336)
    U4, S4, V4 = pca(np.array(data_7, dtype='float'), 2)
    data_arg = U4[day]
    print("降维后的感知差识别数据:", data_arg[0], data_arg[1])
    print("ECI:", ECI)
    print("time:", time)
    print("name:", name)

    T = U2  # 预测
    P = U3
    num1 = num2 = 0
    num3 = num4 = 0
    string1 = '该日数据存在感知差问题'
    string2 = '该日数据感知正常'
    for i in range(2093):
        if euler_distance(T[i], centroids[0]) <= euler_distance(
                T[i], centroids[1]):
            num1 += 1
        else:
            num2 += 1
    if num1 >= num2:
        print("感知差聚类中心点为:", centroids[0][0], centroids[0][1])
        # centroids0为问题小区中心点
        for i in range(476):
            if euler_distance(P[i], centroids[0]) <= euler_distance(
                    P[i], centroids[1]):
                num3 += 1
        print('预测准确度为:', '%.2f' % (100 * num3 / 476), '%')
        dis1 = euler_distance(data_arg, centroids[0])
        dis2 = euler_distance(data_arg, centroids[1])
        if dis1 < dis2:
            string = string1
            print(string)
        else:
            string = string2
            print(string)
    else:
        print("感知正常中心点为:", centroids[1][0], centroids[1][1])
        # centroids1为问题小区中心点
        for i in range(476):
            if euler_distance(P[i], centroids[0]) >= euler_distance(
                    P[i], centroids[1]):
                num4 += 1
        print('预测准确度为:', '%.2f' % (100 * num4 / 476), '%')
        dis1 = euler_distance(data_arg, centroids[0])
        dis2 = euler_distance(data_arg, centroids[1])
        if dis1 > dis2:
            string = string1
            print(string)
        else:
            string = string2
            print(string)
    return string, ECI, time, name
Пример #2
0
# plt.show()

# plt.subplot2grid((2,2),(0,0))
# plt.scatter(U2[:, 0], U2[:, 1],c='red')
# plt.subplot2grid((2,2),(0,1))
# plt.scatter(U3[:, 0], U3[:, 1],c='blue')
# # plt.savefig('F:\\导出的图片1.png')
# plt.subplot2grid((2,2),(1,0))
# # plt.scatter(U1[:, 0], U1[:, 1],c='gold')
# plotData(U1, centroids_all, idx)
# # plt.savefig('F:\\导出的图片3.png')
# plt.show()

tf.reset_default_graph()
a = np.random.randint(0, 12)
predictdata = PredictData()                            # 做感知差识别的数据
predictdata = fill(predictdata, 84, 336, 12)
predictdata = normalization(predictdata, 84, 336)
U4 = encode(predictdata)
data_arg = U4[a*7: (a+1)*7]
for i in range(7):
    print("降维后的第{}天数据".format(i), data_arg[i])

# plt.show()

T = U2
P = U3
num1 = num2 = 0
num3 = num4 = 0
for i in range(2093):
    if euler_distance(T[i], centroids[0]) <= euler_distance(T[i], centroids[1]):
Пример #3
0
from predict_data import PredictData
from fill_normalization import fill, normalization
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima_model import ARIMA
import statsmodels.api as sm

data = PredictData()  #未知数据产生聚类中心
data = fill(data, 84, 336, 12)
data = data.reshape(2016, 14)
c = data[:, 1]

np.random.seed(5)
a = np.random.randint(0, 12)
c = c[168 * a:168 * (a + 1)]

time = pd.Series(np.array(c, dtype=float),
                 index=pd.date_range(start='2016-10-08', periods=168,
                                     freq='H'))
print(time)

time.plot()
# plt.title("column 1 data and diff data")
# plt.show()

# # ADF单位根检验判断是不是平稳序列
# t = sm.tsa.stattools.adfuller(time, )
# output = pd.DataFrame(index=['Test Statistic Value', "p-value", "Lags Used", "Number of Observations Used", "Critical Value(1%)", "Critical Value(5%)", "Critical Value(10%)"]
#                       , columns=['value'])
# output['value']['Test Statistic Value'] = t[0]
    ipi = ImmediateData(fm)
    target_folders = [i for i in files if "_growth" in i]
    for t in target_folders:
        tar_file_path = file_path + t + "/"
        # print('tar_file_path:', tar_file_path)
        subfiles = os.listdir(tar_file_path)
        for s in subfiles:
            if ".xlsx" in s and "~$" not in s:
                # print('    ', s)
                fm.initialized_data(t + "/" + s, s.split('.')[0], "p")
                fm.store_yoy_growth_dataInfo(t + "/" + s, s.split('.')[0])
    ipi.run_ipi("baseCompInfo")

    # Prediction data
    print("Predict Data")
    pdd = PredictData(fm)
    tar_file_path = file_path + 'yuce/std/'
    subfiles = os.listdir(tar_file_path)
    predict_categories = [
        "EPS", "profitGrowth", "netProfit", "netProfitComp", "ROE", "BPS",
        "close"
    ]
    for i in predict_categories:
        for j in subfiles:
            if j.split("_")[0] == i or j.split(".")[0] == i:
                fm.initialized_data('yuce/std/' + j,
                                    j.split(".xlsx")[0], "p", True)
                fm.store_predict_dataInfo(j.split(".xlsx")[0], i)
    pdd.run_predict()

    fm.initialized_data("Industry/ROE.xlsx",
def split_sequences(sequences, n_steps):
    X, y = list(), list()
    for i in range(len(sequences)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the dataset
        if end_ix > len(sequences) - 1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix, :], sequences[end_ix, :]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)


data = PredictData()
data = fill(data, 84, 336, 12)
np.random.seed(5)
a = np.random.randint(0, 12)
data = data[7 * a:7 * (a + 1)]
data.resize(168, 14)

scale = MinMaxScaler(feature_range=(0, 1))
data = scale.fit_transform(data)

# choose a number of time steps
n_steps = 10
# convert into input/output
X, y = split_sequences(data, n_steps)
n_features = X.shape[2]
Пример #6
0
def exponential_smoothing(alpha, s):
    '''
    一次指数平滑
    :param alpha:  平滑系数
    :param s:      数据序列, list
    :return:       返回一次指数平滑模型参数, list
    '''

    s_temp = []
    s_temp.append(s[0])
    for i in range(1, len(s), 1):
        s_temp.append(alpha * s[i - 1] + (1 - alpha) * s_temp[i - 1])
    return s_temp


data = PredictData()
data = fill(data, 84, 336, 12)
data = data.reshape(2016, 14)
c = data[:, 1]
np.random.seed(5)
a = np.random.randint(0, 12)
c = c[168 * a:168 * (a + 1)]
# print(c)
# print(type(c))  #numpy.ndarray
# print(c.shape)  # (168,)

time = pd.Series(np.array(c, dtype=float),
                 index=pd.date_range(start='2018-10-08', periods=168,
                                     freq='H'))
# print(time)
dict_time = {'ds': time.index, 'y': time.values}