def run_model(self, n, n_cluster, n_effective, step, threshold): data = self.input_data data = data.values.reshape(1, -1) index = round(len(data[0]) / 3) p1 = data[0, 0:index] p2 = data[0, index:2 * index] p_eval = data[0, 2 * index:] data_pro = ProcessData(p1, n, n_cluster, n_effective) effective = data_pro.select_effective_clusters() test_model = Prediction(effective, p2, n, p_eval) p = test_model.predict_delta_p() bench = np.random.randn(100, 1) hold = np.random.randn(1, 100) eval_result = Evaluation(p_eval, max(n), p, step, threshold, bench, hold, 100, True, 5000, 5000, 4) returns = eval_result.periodic_return()[0] market = eval_result.periodic_return()[1] temp = Calculate_index(returns, market, 0.05, 0.04, 1, 500, 4) sharpe = temp.sharpe_ratio() return sharpe, eval_result.visual_account( threshold)[0], eval_result.visual_account(threshold)[2]
def run_model(self, n, n_cluster, n_effective, step, threshold, is_ultimate): """ :param n: 一组设定时间序列长度的数组 :param n_cluster: int, 聚类个数 :param n_effective: 有效聚类个数 :param step: 步长 :param threshold: 阈值 :param is_ultimate: 波尔数,是否运行最终结果 :return:夏普指数,账户余额 """ data = self.input_data data = data.values.reshape(1, -1) index = round(len(data[0]) / 3) p1 = data[0, 0:index] p2 = data[0, index:2 * index] p_eval = data[0, 2 * index:] data_pro = ProcessData(p1, n, n_cluster, n_effective) effective = data_pro.select_effective_clusters() test_model = Prediction(effective, p2, n, p_eval) p = test_model.predict_delta_p() bench = np.random.randn(100, 1) hold = np.random.randn(1, 100) eval_result = Evaluation(p_eval, max(n), p, step, threshold, bench, hold, 100, True, 5000, 5000, 4) drawdown = eval_result.calculate_max_drawdown() #returns = eval_result.periodic_return()[0] #market = eval_result.periodic_return()[1] #temp = Calculate_index(returns, market, 0.05, 0.04, 1, 500, 4) # sharpe = temp.sharpe_ratio() if is_ultimate: rate = eval_result.correct_rate() print("Correct rate:", rate) eval_result.plot_price_and_profit() return drawdown, eval_result.visual_account( threshold)[0], eval_result.visual_account(threshold)[2]
class Prediction: def __init__(self, s, prices, n, price_3): self.prices = prices self.s = s self.n = n self.price_3 = price_3 ''' 创建一个函数表示贝叶斯模型的公式 ''' def bayesian_model(self, x, center): num = 0 den = 0 getcontext().prec = 10 getcontext().Emax = 99999999 getcontext().Emin = -99999999 for i in range(len(center)): x_i = center[i, :len(x)] y_i = center[i, len(x)] expect = np.exp(Decimal(-0.35 * norm(x - x_i)**2)) num += expect * Decimal(y_i) den += expect return num / den ''' 建立delta_p和delta_p1,delta_p2,delta_p3之间的关系,并确定自变量X和应变量Y ''' def variance_determine(self): X = np.empty((len(self.prices) - max(self.n) - 1, 4)) Y = np.empty((len(self.prices) - max(self.n) - 1, 1)) for i in range(max(self.n), len(self.prices) - 1): delta_p = Decimal(self.prices[i + 1] - self.prices[i]) delta_p1 = self.bayesian_model(self.prices[i - self.n[0]:i], self.s[0][0]) delta_p2 = self.bayesian_model(self.prices[i - self.n[1]:i], self.s[1][0]) delta_p3 = self.bayesian_model(self.prices[i - self.n[2]:i], self.s[2][0]) delta_p4 = self.bayesian_model(self.prices[i - self.n[3]:i], self.s[3][0]) X[i - max(self.n), :] = [delta_p1, delta_p2, delta_p3, delta_p4] Y[i - max(self.n)] = delta_p return X, Y ''' 对第二个数据集中的X,Y进行线性拟合,确定参数w0, w1, w2, w3, w4 ''' def find_parameters_w(self): X, Y = self.variance_determine() clf = linear_model.LinearRegression() clf.fit(X, Y) w0 = clf.intercept_ w1 = clf.coef_[0, 0] w2 = clf.coef_[0, 1] w3 = clf.coef_[0, 2] w4 = clf.coef_[0, 3] return w0, w1, w2, w3, w4 ''' 利用拟合出的参数,根据XY的线性关系,求出各时点的价格变化,并储存在矩阵delta_p中 ''' def predict_delta_p(self): w0, w1, w2, w3, w4 = self.find_parameters_w() delta_p = [] w0 = float(w0) for i in range(max(self.n), len(self.price_3) - 1): delta_p1 = self.bayesian_model(self.price_3[i - self.n[0]:i], self.s[0][0]) delta_p2 = self.bayesian_model(self.price_3[i - self.n[1]:i], self.s[1][0]) delta_p3 = self.bayesian_model(self.price_3[i - self.n[2]:i], self.s[2][0]) delta_p4 = self.bayesian_model(self.price_3[i - self.n[3]:i], self.s[3][0]) dp = Decimal(w0) + Decimal(w1) * delta_p1 + Decimal(w2) * delta_p2 + Decimal(w3) * delta_p3\ + Decimal(w4) * delta_p4 delta_p.append(float(dp)) return delta_p if __name__ == '__main__': import pandas as pd from bayesian_model.data_processor import ProcessData from bayesian_model.bayesian_regression import Prediction p1 = pd.read_csv('.//p1.csv') p2 = pd.read_csv('.//p2.csv') p3 = pd.read_csv('.//p3.csv') n = [90, 180, 360, 720] price_reshaped_1 = p1.values.reshape((1, -1)) price_reshaped_2 = p2.values.reshape((1, -1)) price_reshaped_3 = p3.values.reshape((1, -1)) data_pro = ProcessData(price_reshaped_1, n, 100, 20) effective = data_pro.select_effective_clusters() test_model = Prediction(effective, price_reshaped_2, n, price_reshaped_3) p = test_model.predict_delta_p() print(p)
best_w4 = total_w4 / 10 return best_w0, best_w1, best_w2, best_w3, best_w4 def error(self): pre = Prediction(self.s, self.prices, self.n, self.price_3) predicted_dp = pre.predict_delta_p() print(predicted_dp) """ actual_dp = [0] variance = 0 for i in range(max(self.n), len(self.price_3)-1): actual_dp.append(self.price_3[i] - self.price_3[i - 1]) variance += (predicted_dp[i - max(self.n)] - actual_dp[i - max(self.n)]) ** 2 error = variance / (2 * len(predicted_dp)) return error """ if __name__ == '__main__': p1 = pd.read_csv(".//price4.csv") p2 = pd.read_csv(".//price5.csv") p3 = pd.read_csv(".//price6.csv") price_reshaped_1 = p1.values.reshape((1, -1))[0, :] price_reshaped_2 = p2.values.reshape((1, -1))[0, :] price_reshaped_3 = p3.values.reshape((1, -1))[0, :] n = [90, 180, 360, 720] process = ProcessData(price_reshaped_1, n, 100, 20) s = process.select_effective_clusters() temp = Train(s, price_reshaped_2, n, price_reshaped_3) result = temp.error() print(result)
class ProcessData: """ price_data: 价格数据 n: 一个储存多个移动窗口长度的向量,维度为1 * 向量个数 num_cluster: 生成聚类的个数 num_effective_cluster: 选取最有效的聚类的个数 """ def __init__(self, price_data, n, num_cluster, num_effective_cluster): self.price_data = price_data self.n = n self.num_cluster = num_cluster self.num_effective_cluster = num_effective_cluster """ 把原始价格数据分别按照不同的窗口长度n划分后组成矩阵 把每个矩阵存在list中,list的每个元素代表一个按照某种窗口长度划分成时间序列后组成的矩阵 """ def generate_time_series(self): num_of_matrices = len(self.n) list_matrix = [[] for k in range(num_of_matrices)] for i in range(len(self.n)): num_n = self.n[i] num_row = len(self.price_data) - num_n ts = np.empty((num_row, num_n + 1)) for j in range(num_row): ts[j, :num_n] = self.price_data[j:j + num_n] ts[j, num_n] = self.price_data[j + num_n] - self.price_data[j + num_n - 1] list_matrix[i].append(ts) return list_matrix """ 用划分好的时间序列进行聚类 返回聚类 """ def find_clusters(self): time_series = self.generate_time_series() num_matrices = len(self.n) list_clusters = [[] for k in range(num_matrices)] for i in range(num_matrices): generate_clusters = KMeans(n_clusters=self.num_cluster, random_state=25, max_iter=666) generate_clusters.fit(time_series[i][0]) list_clusters[i].append(generate_clusters.cluster_centers_) return list_clusters """ 将每个聚类的特征最大值和最小值求差,把聚类按照极差值从大到小排列,并根据需求选出前几个聚类 返回选出的聚类 """ def select_effective_clusters(self): clusters = self.find_clusters() num_cluster_list = len(self.n) list_effective_clusters = [[] for k in range(num_cluster_list)] for i in range(num_cluster_list): cluster = clusters[i][0] list_effective_clusters[i].append( cluster[np.argsort(np.ptp(cluster, axis=1))[-self.num_effective_cluster:]]) return list_effective_clusters if __name__ == '__main__': import pandas as pd from bayesian_model.data_processor import ProcessData p1 = pd.read_csv('.//p1.csv') p2 = pd.read_csv('.//p2.csv') p3 = pd.read_csv('.//p3.csv') n = [90, 180, 360, 720] price_reshaped_1 = p1.values.reshape((1, -1)) price_reshaped_2 = p2.values.reshape((1, -1)) price_reshaped_3 = p3.values.reshape((1, -1)) data_pro = ProcessData(price_reshaped_1, n, 100, 20) effective = data_pro.select_effective_clusters()
from bayesian_model.index import Calculate_index import numpy as np import matplotlib.pyplot as plt p1 = pd.read_csv('.//p1.csv') p2 = pd.read_csv('.//p2.csv') p3 = pd.read_csv('.//p3.csv') # def read_data(): #p1 = pd.read_csv('.//price4.csv') #p2 = pd.read_csv('.//price5.csv') #p3 = pd.read_csv('.//price6.csv') n = [90, 180, 360, 720] price_reshaped_1 = p1.values.reshape((1, -1))[0, :] price_reshaped_2 = p2.values.reshape((1, -1))[0, :] price_reshaped_3 = p3.values.reshape((1, -1))[0, :] data_pro = ProcessData(price_reshaped_1, n, 100, 20) effective = data_pro.select_effective_clusters() test_model = Prediction(effective, price_reshaped_2, n, price_reshaped_3) p = test_model.predict_delta_p() print(p) actual_dp = [] for i in range(len(price_reshaped_3) - 1): actual_dp.append(price_reshaped_3[i + 1] - price_reshaped_3[i]) # print(actual_dp) eval = Evaluation(price_reshaped_3, 720, p, 2, 0.07, True, 5000, 5000, 100) eval.plot_price_and_profit() eval.correct_rate() #eval.calculate_max_drawdown() print(eval.sharpe_ratio()) # delta = eval.visual_account()