Пример #1
0
from base_function import data_trans
import matplotlib.pyplot as plt

plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号


name_data = '000001'
path = '../data/'+name_data+'.csv'  # 数据的地址

df_01 = pd.read_csv(path, encoding='gbk')  # 读取数据
df_data = df_01[['Date', 'Close']].set_index('Date').iloc[::-1]  # 把数据按日期排列
df_data['Close'] = df_data['Close'].astype('float64')
data = np.array(df_data)
data = np.reshape(data, (len(data),))  # 转换成(sample,)np.array
diff = data_trans(data)  # 转化为百分比数据
plt.figure(1)
plt.plot(diff, label='差分后序列')
plt.plot(data, linestyle='--', label='原始序列')
plt.legend()
plt.figure(figsize=(6, 9))
decomposer = EMD(diff)
imfs = decomposer.decompose()
num_imfs = imfs.shape[0]
plt.subplot(num_imfs+1, 1, 1)
plt.plot(diff)
plt.ylabel("original")
for n in range(num_imfs-1):
    plt.subplot(num_imfs+1, 1, n+2)
    plt.plot(imfs[n])
    plt.ylabel("imf %i" % (n+1))
Пример #2
0
# -*- coding: utf-8 -*-
"""
Created on =2019-07-21

@author: wenshijie
"""
# 计算序列极值点个数

import pandas as pd
import numpy as np
from base_function import data_trans
from scipy.signal import argrelmax, argrelmin


name_data = '000001'
path = '../data/'+name_data+'.csv'  # 数据的地址
df = pd.read_csv(path, encoding='gbk')  # 读取数据
df_data = df[['Date', 'Close']].set_index('Date').iloc[::-1]  # 把数据按日期排列,日期向下递增,
df_data['Close'] = df_data['Close'].astype('float64')
data = np.array(df_data)
diff_data = data_trans(data)

# 原序列极值点
print('原序列极小值点的个数:{}'.format(len(argrelmin(data)[0])))
print('原序列极大值点的个数:{}'.format(len(argrelmax(data)[0])))
print('原序列极值点的个数:{}'.format(len(argrelmin(data)[0])+len(argrelmax(data)[0])))
# 差分后序列极值点
print('差分后序列极小值点的个数:{}'.format(len(argrelmin(diff_data)[0])))
print('差分后序列极大值点的个数:{}'.format(len(argrelmax(diff_data)[0])))
print('差分后序列极值点的个数:{}'.format(len(argrelmin(diff_data)[0])+len(argrelmax(diff_data)[0])))
Пример #3
0
# -*- coding: utf-8 -*-
"""
Created on =2019-08-26

@author: wenshijie
"""
import pandas as pd
import numpy as np
from base_function import data_trans
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.stattools import adfuller

name_data = '000001'  # 数据
path = '../data/' + name_data + '.csv'  # 数据的地址
df = pd.read_csv(path, encoding='gbk')  # 读取数据
df_data = df[['Date',
              'Close']].set_index('Date').iloc[::-1]  # 把数据按日期排列,日期向下递增,
df_data['Close'] = df_data['Close'].astype('float64')
data = np.array(df_data)
data = np.reshape(data, (len(data), ))  # 转换成(sample,)np.array
data_tf = data_trans(data)  # 数据变换
print(adfuller(data_tf))
print(acorr_ljungbox(data_tf, 4))

# (-13.317419484079407, 6.5606195062645e-25, 34, 6880, {'1%': -3.4313008345208873, '5%': -2.861960191315825, '10%': -2.566993663994896}, 70578.62953782696)
# (array([ 8.9800194 , 18.56779201, 32.70658048, 68.73587092]), array([2.72947750e-03, 9.29084455e-05, 3.71390617e-07, 4.19570537e-14]))
# (-20.52410113765169, 0.0, 17, 7104, {'1%': -3.4312708424082357, '5%': -2.861946939301189, '10%': -2.5669866097143914}, 57466.82649624165)
# (array([16.55581391, 26.15022647, 26.50217407, 28.54015245]), array([4.72389341e-05, 2.09676844e-06, 7.48606237e-06, 9.69227761e-06]))
Пример #4
0
    :param m:
    :return:
    """
    return list(pd.DataFrame(m).corr().iloc[-1, :])


name_data = '000001'
path = '../data/' + name_data + '.csv'  # 数据的地址

df_01 = pd.read_csv(path, encoding='gbk')  # 读取数据
df_data = df_01[['Date', 'Close']].set_index('Date').iloc[::-1]  # 把数据按日期排列
df_data['Close'] = df_data['Close'].astype('float64')
data = np.array(df_data)
data = np.reshape(data, (len(data), ))  # 转换成(sample,)np.array

diff = data_trans(data)

# print(theta(series_minmax(data)[0]))
# print(theta(series_minmax(diff)[0]))

# print(corrcoef_imfs(series_minmax(data)[0]))
# print(corrcoef_imfs(series_minmax(diff)[0]))

print(matrix_cor(seq_tf_matrix(data, 10)))
# 上证指数
# [0.9925052213533833, 0.993386133667623, 0.9942571787506765, 0.9950962804128652, 0.9960115824395269,
# 0.9969224666313158, 0.9977260830415531, 0.9984645658860706, 0.9992588904110402, 1.0]
# 标普500
# [0.9980923986063942, 0.9982712262593705, 0.998458288931568, 0.9986495525239756, 0.9988430656437745,
# 0.9990563718109147, 0.9992787669886709, 0.9994977021953116, 0.9997361622154102, 1.0]
print(matrix_cor(seq_tf_matrix(diff, 10)))