from base_function import data_trans import matplotlib.pyplot as plt plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 name_data = '000001' path = '../data/'+name_data+'.csv' # 数据的地址 df_01 = pd.read_csv(path, encoding='gbk') # 读取数据 df_data = df_01[['Date', 'Close']].set_index('Date').iloc[::-1] # 把数据按日期排列 df_data['Close'] = df_data['Close'].astype('float64') data = np.array(df_data) data = np.reshape(data, (len(data),)) # 转换成(sample,)np.array diff = data_trans(data) # 转化为百分比数据 plt.figure(1) plt.plot(diff, label='差分后序列') plt.plot(data, linestyle='--', label='原始序列') plt.legend() plt.figure(figsize=(6, 9)) decomposer = EMD(diff) imfs = decomposer.decompose() num_imfs = imfs.shape[0] plt.subplot(num_imfs+1, 1, 1) plt.plot(diff) plt.ylabel("original") for n in range(num_imfs-1): plt.subplot(num_imfs+1, 1, n+2) plt.plot(imfs[n]) plt.ylabel("imf %i" % (n+1))
# -*- coding: utf-8 -*- """ Created on =2019-07-21 @author: wenshijie """ # 计算序列极值点个数 import pandas as pd import numpy as np from base_function import data_trans from scipy.signal import argrelmax, argrelmin name_data = '000001' path = '../data/'+name_data+'.csv' # 数据的地址 df = pd.read_csv(path, encoding='gbk') # 读取数据 df_data = df[['Date', 'Close']].set_index('Date').iloc[::-1] # 把数据按日期排列,日期向下递增, df_data['Close'] = df_data['Close'].astype('float64') data = np.array(df_data) diff_data = data_trans(data) # 原序列极值点 print('原序列极小值点的个数:{}'.format(len(argrelmin(data)[0]))) print('原序列极大值点的个数:{}'.format(len(argrelmax(data)[0]))) print('原序列极值点的个数:{}'.format(len(argrelmin(data)[0])+len(argrelmax(data)[0]))) # 差分后序列极值点 print('差分后序列极小值点的个数:{}'.format(len(argrelmin(diff_data)[0]))) print('差分后序列极大值点的个数:{}'.format(len(argrelmax(diff_data)[0]))) print('差分后序列极值点的个数:{}'.format(len(argrelmin(diff_data)[0])+len(argrelmax(diff_data)[0])))
# -*- coding: utf-8 -*- """ Created on =2019-08-26 @author: wenshijie """ import pandas as pd import numpy as np from base_function import data_trans from statsmodels.stats.diagnostic import acorr_ljungbox from statsmodels.tsa.stattools import adfuller name_data = '000001' # 数据 path = '../data/' + name_data + '.csv' # 数据的地址 df = pd.read_csv(path, encoding='gbk') # 读取数据 df_data = df[['Date', 'Close']].set_index('Date').iloc[::-1] # 把数据按日期排列,日期向下递增, df_data['Close'] = df_data['Close'].astype('float64') data = np.array(df_data) data = np.reshape(data, (len(data), )) # 转换成(sample,)np.array data_tf = data_trans(data) # 数据变换 print(adfuller(data_tf)) print(acorr_ljungbox(data_tf, 4)) # (-13.317419484079407, 6.5606195062645e-25, 34, 6880, {'1%': -3.4313008345208873, '5%': -2.861960191315825, '10%': -2.566993663994896}, 70578.62953782696) # (array([ 8.9800194 , 18.56779201, 32.70658048, 68.73587092]), array([2.72947750e-03, 9.29084455e-05, 3.71390617e-07, 4.19570537e-14])) # (-20.52410113765169, 0.0, 17, 7104, {'1%': -3.4312708424082357, '5%': -2.861946939301189, '10%': -2.5669866097143914}, 57466.82649624165) # (array([16.55581391, 26.15022647, 26.50217407, 28.54015245]), array([4.72389341e-05, 2.09676844e-06, 7.48606237e-06, 9.69227761e-06]))
:param m: :return: """ return list(pd.DataFrame(m).corr().iloc[-1, :]) name_data = '000001' path = '../data/' + name_data + '.csv' # 数据的地址 df_01 = pd.read_csv(path, encoding='gbk') # 读取数据 df_data = df_01[['Date', 'Close']].set_index('Date').iloc[::-1] # 把数据按日期排列 df_data['Close'] = df_data['Close'].astype('float64') data = np.array(df_data) data = np.reshape(data, (len(data), )) # 转换成(sample,)np.array diff = data_trans(data) # print(theta(series_minmax(data)[0])) # print(theta(series_minmax(diff)[0])) # print(corrcoef_imfs(series_minmax(data)[0])) # print(corrcoef_imfs(series_minmax(diff)[0])) print(matrix_cor(seq_tf_matrix(data, 10))) # 上证指数 # [0.9925052213533833, 0.993386133667623, 0.9942571787506765, 0.9950962804128652, 0.9960115824395269, # 0.9969224666313158, 0.9977260830415531, 0.9984645658860706, 0.9992588904110402, 1.0] # 标普500 # [0.9980923986063942, 0.9982712262593705, 0.998458288931568, 0.9986495525239756, 0.9988430656437745, # 0.9990563718109147, 0.9992787669886709, 0.9994977021953116, 0.9997361622154102, 1.0] print(matrix_cor(seq_tf_matrix(diff, 10)))