def prepareData(code): df_all = base.getOneStockData(code) if df_all.shape[0] < 300: raise Exception('data is short.' + str(df_all.shape[0])) in_columns = ['open','close','high','low','volume'] indexes = [] for i in range(OLD_DAYS): for in_column in in_columns: index = in_column + '__' + str(i) indexes.append(index) df_all[index] = df_all[in_column].shift(0 - i) futureIndexes = [] for i in range(1, FUTURE_DAYS): index = 'high_' + str(i) futureIndexes.append(index) df_all[index] = df_all.high.shift(i) index = 'high_future' df_all[index] = df_all[futureIndexes].apply(max, 1) # indexes.append(index) index = 'operate' df_all[index] = df_all.apply(calcOperate, 1) indexes.append(index) # print df_all.head(10) df = df_all[indexes].dropna() return df
def checkOne(code_str): df = base.getOneStockData(code_str) df['MINUS_DM_' + str(adx_timeperiod)] = ta.MINUS_DM( np.array(df['high']), np.array(df['low']), timeperiod=adx_timeperiod) df['PLUS_DM_' + str(adx_timeperiod)] = ta.PLUS_DM( np.array(df['high']), np.array(df['low']), timeperiod=adx_timeperiod) df['TR_' + str(adx_timeperiod)] = ta.TRANGE(np.array(df['high']), np.array(df['low']), np.array(df['close'])) df['MINUS_DI_' + str(adx_timeperiod)] = ta.MINUS_DI( np.array(df['high']), np.array(df['low']), np.array(df['close']), timeperiod=adx_timeperiod) df['PLUS_DI_' + str(adx_timeperiod)] = ta.PLUS_DI( np.array(df['high']), np.array(df['low']), np.array(df['close']), timeperiod=adx_timeperiod) df['ADX_' + str(adx_timeperiod)] = ta.ADX(np.array(df['high']), np.array(df['low']), np.array(df['close']), timeperiod=adx_timeperiod) df['ADXR_' + str(adx_timeperiod)] = ta.ADXR(np.array(df['high']), np.array(df['low']), np.array(df['close']), timeperiod=adx_timeperiod) df.to_csv(ADX_DIR + code_str + '.csv')
def markVolumePrice(code): df = None output_path = getVolumePricePath(code) if os.path.exists(output_path): df = pd.read_csv(output_path, index_col=0) else: df = base.getOneStockData(code) if df.empty: return if df.shape[0] < 100: return df['close_diff'] = df.close.pct_change() for index,row in df[df.close_diff < 0].iterrows(): df.loc[index,'close_diff'] = row['close_diff'] / (1+row['close_diff']) df['close_diff_abs'] = abs(df.close_diff) df['volume_change'] = df.volume.diff() df['volume_diff'] = df.volume.pct_change() for index,row in df[df.volume_diff < 0].iterrows(): df.loc[index,'volume_diff'] = row['volume_diff'] / (1+row['volume_diff']) df['volume_diff_abs'] = abs(df.volume_diff) df.to_csv(output_path)
def markVolumePrice(code): df = None output_path = getVolumePricePath(code) if os.path.exists(output_path): df = pd.read_csv(output_path, index_col=0) else: df = base.getOneStockData(code) if df.empty: return if df.shape[0] < 100: return df['close_diff'] = df.close.pct_change() for index, row in df[df.close_diff < 0].iterrows(): df.loc[index, 'close_diff'] = row['close_diff'] / (1 + row['close_diff']) df['close_diff_abs'] = abs(df.close_diff) df['volume_change'] = df.volume.diff() df['volume_diff'] = df.volume.pct_change() for index, row in df[df.volume_diff < 0].iterrows(): df.loc[index, 'volume_diff'] = row['volume_diff'] / (1 + row['volume_diff']) df['volume_diff_abs'] = abs(df.volume_diff) df.to_csv(output_path)
def prepareData(code): df_all = base.getOneStockData(code) if df_all.shape[0] < 300: raise Exception('data is short.' + str(df_all.shape[0])) in_columns = ['open', 'close', 'high', 'low', 'volume'] indexes = [] for i in range(OLD_DAYS): for in_column in in_columns: index = in_column + '__' + str(i) indexes.append(index) df_all[index] = df_all[in_column].shift(0 - i) futureIndexes = [] for i in range(1, FUTURE_DAYS): index = 'high_' + str(i) futureIndexes.append(index) df_all[index] = df_all.high.shift(i) index = 'high_future' df_all[index] = df_all[futureIndexes].apply(max, 1) # indexes.append(index) index = 'operate' df_all[index] = df_all.apply(calcOperate, 1) indexes.append(index) # print df_all.head(10) df = df_all[indexes].dropna() return df
def checkOne(code_str): df = base.getOneStockData(code_str) df['MINUS_DM_' + str(adx_timeperiod)] = ta.MINUS_DM(np.array(df['high']), np.array(df['low']), timeperiod=adx_timeperiod) df['PLUS_DM_' + str(adx_timeperiod)] = ta.PLUS_DM(np.array(df['high']), np.array(df['low']), timeperiod=adx_timeperiod) df['TR_' + str(adx_timeperiod)] = ta.TRANGE(np.array(df['high']), np.array(df['low']), np.array(df['close'])) df['MINUS_DI_' + str(adx_timeperiod)] = ta.MINUS_DI(np.array(df['high']), np.array(df['low']), np.array(df['close']), timeperiod=adx_timeperiod) df['PLUS_DI_' + str(adx_timeperiod)] = ta.PLUS_DI(np.array(df['high']), np.array(df['low']), np.array(df['close']), timeperiod=adx_timeperiod) df['ADX_' + str(adx_timeperiod)] = ta.ADX(np.array(df['high']), np.array(df['low']), np.array(df['close']), timeperiod=adx_timeperiod) df['ADXR_' + str(adx_timeperiod)] = ta.ADXR(np.array(df['high']), np.array(df['low']), np.array(df['close']), timeperiod=adx_timeperiod) df.to_csv(ADX_DIR + code_str + '.csv')
def checkByYearMonth(code): results = pd.DataFrame() df = base.getOneStockData(code) print df if df.empty: return None for index, item in df.iterrows(): # print item date_str = item['date'] date = datetime.datetime.strptime(date_str, '%Y-%m-%d') year_month = date.strftime('%Y-%m') month = date.strftime('%m') df.loc[index, 'year_month'] = year_month df.loc[index, 'month'] = month df['close_avg'] = df['close'] df['close_min'] = df['close'] df['close_max'] = df['close'] df['close_first'] = df['close'] df['close_last'] = df['close'] df['close_last'] = df['close'] df['asc_days'] = df['close'] df['desc_days'] = df['close'] group_month = df.groupby('year_month') results = group_month.agg({ 'close_avg': lambda g: np.average(g), 'close_min': lambda g: np.min(g), 'close_max': lambda g: np.max(g), 'close_first': lambda g: g[g.first_valid_index()], 'close_last': lambda g: g[g.last_valid_index()], 'month': lambda g: g[g.first_valid_index()], 'asc_days': _calcAscDays, 'desc_days': _calcDescDays, }) return results
def checkByYearMonth(code): results = pd.DataFrame() df = base.getOneStockData(code) print df if df.empty: return None for index,item in df.iterrows(): # print item date_str = item['date'] date = datetime.datetime.strptime(date_str, '%Y-%m-%d') year_month = date.strftime('%Y-%m') month = date.strftime('%m') df.loc[index, 'year_month'] = year_month df.loc[index, 'month'] = month df['close_avg'] = df['close'] df['close_min'] = df['close'] df['close_max'] = df['close'] df['close_first'] = df['close'] df['close_last'] = df['close'] df['close_last'] = df['close'] df['asc_days'] = df['close'] df['desc_days'] = df['close'] group_month = df.groupby('year_month') results = group_month.agg({'close_avg':lambda g: np.average(g), 'close_min':lambda g: np.min(g), 'close_max':lambda g: np.max(g), 'close_first':lambda g: g[g.first_valid_index()], 'close_last':lambda g: g[g.last_valid_index()], 'month':lambda g: g[g.first_valid_index()], 'asc_days': _calcAscDays, 'desc_days': _calcDescDays, }) return results
def test2(): df_all = base.getOneStockData('000002') n_lag = 3 n_seq = 1 n_test = 100 series = df_all[['close']].dropna() # out = series_to_supervised(df_all[['close','volume']], 3, 1, True) # print out # df['max_close'] = df[['close1','close2','close3']].apply(max, 1) train, test = prepare_data(series, n_test, n_lag, n_seq) # print(test) print('Train: %s, Test: %s' % (train.shape, test.shape)) # make forecasts forecasts = make_forecasts(train, test, n_lag, n_seq) # evaluate forecasts # evaluate_forecasts(test, forecasts, n_lag, n_seq) # plot forecasts y = series.tail(n_test) # print '------------------' # print y.head(10) # print '-----------------' # print forecasts plot_forecasts(y, forecasts, n_test + 2)
def test(): df_all = base.getOneStockData('000002') df_all['volume_diff'] = df_all.volume.pct_change() for index, row in df_all[df_all.volume_diff < 0].iterrows(): df_all.loc[index, 'volume_diff'] = row['volume_diff'] / (1 + row['volume_diff']) df_all['close_diff'] = df_all.close.pct_change() * 100 df_all = df_all.dropna(subset=['volume_diff', 'close_diff']) df_all = df_all[df_all.close_diff < 11] df_all = df_all[df_all.close_diff > -11] df_all = df_all[abs(df_all.volume_diff) > 1] dfx = df_all[['volume_diff']] dfy = df_all[['close_diff']] # X_train = [[6], [8], [10], [14], [18]] # y_train = [[7], [9], [13], [17.5], [18]] # X_test = [[6], [8], [11], [16]] # y_test = [[8], [12], [15], [18]] print df_all.shape count = dfx.shape[0] / 2 - 3 X_train = dfx[:count] y_train = dfy[1:count + 1] X_test = dfx[count:count + count] y_test = dfy[count + 1:count + count + 1] runplt(X_train, y_train, X_test, y_test) plt.plot(X_train, y_train, 'k.') # 建立线性回归,并用训练的模型绘图 regressor = LinearRegression() regressor.fit(X_train, y_train) yy = regressor.predict(X_test) # df_all['LR1'] = pd.Series() # df_all['LR1'][count+1:count+count+1] = yy plt.plot(y_train, yy, 'y-') quadratic_featurizer = PolynomialFeatures(degree=2) X_train_quadratic = quadratic_featurizer.fit_transform(X_train) X_test_quadratic = quadratic_featurizer.transform(X_test) regressor_quadratic = LinearRegression() regressor_quadratic.fit(X_train_quadratic, y_train) xx_quadratic = quadratic_featurizer.transform(X_test) plt.plot(X_test, regressor_quadratic.predict(xx_quadratic), 'r-') cubic_featurizer = PolynomialFeatures(degree=3) X_train_cubic = cubic_featurizer.fit_transform(X_train) X_test_cubic = cubic_featurizer.transform(X_test) regressor_cubic = LinearRegression() regressor_cubic.fit(X_train_cubic, y_train) xx_cubic = cubic_featurizer.transform(X_test) plt.plot(X_test, regressor_cubic.predict(xx_cubic), 'g') seventh_featurizer = PolynomialFeatures(degree=7) X_train_seventh = seventh_featurizer.fit_transform(X_train) X_test_seventh = seventh_featurizer.transform(X_test) regressor_seventh = LinearRegression() regressor_seventh.fit(X_train_seventh, y_train) xx_seventh = seventh_featurizer.transform(X_test) plt.plot(X_test, regressor_seventh.predict(xx_seventh), 'b') plt.plot(X_test, y_test, 'm+') plt.show() # print(X_train_cubic) # print(X_test_cubic) # print(X_train_seventh) # print(X_test_seventh) print('1 r-liner', regressor.score(X_test, y_test)) print('2 r-squared', regressor_quadratic.score(X_test_quadratic, y_test)) print('3 r-squared', regressor_cubic.score(X_test_cubic, y_test)) print('7 r-squared', regressor_seventh.score(X_test_seventh, y_test))
#predictions = model.predict(X_test) #for i, prediction in enumerate(predictions): # print('Predicted: %s, Target: %s' % (prediction, y_test[i])) #print('R-squared: %.2f' % model.score(X_test, y_test)) # import base import matplotlib.pyplot as plt import pandas as pd import numpy as np from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures df_all = base.getOneStockData('000002') columns = [] for i in range(1, 5): column = 'close(t-' + str(i) + ')' df_all[column] = df_all.close.shift(i) columns.append(column) df_all = df_all.dropna(subset=columns) #df_all = df_all[df_all.close_diff < 11] #df_all = df_all[df_all.close_diff > -11] #df_all = df_all[abs(df_all.volume_diff) > 1] dfx = df_all[columns] dfy = df_all[['close']] #X_train = [[6], [8], [10], [14], [18]] #y_train = [[7], [9], [13], [17.5], [18]] #X_test = [[6], [8], [11], [16]]
def calcMACD(code): success_count = 0 failed_count = 0 output_path = getMacdPath(code) last_operator_price_close = 0 last_operator = 0 if os.path.exists(output_path): df = pd.read_csv(output_path, index_col=0) success_count = df.macd_result[df.macd_result>0].count() failed_count = df.macd_result[df.macd_result<0].count() return (success_count,failed_count) df = base.getOneStockData(code) if df.empty: return (success_count,failed_count) dflen = df.shape[0] if dflen<35: return (success_count,failed_count) df['macd_DIFF_DEA'] = pd.Series() df['macd_DEA_K'] = pd.Series() df['macd_MACD_SELF'] = pd.Series() df['macd_SUM'] = pd.Series() df['macd_result'] = pd.Series() macd, macdsignal, macdhist = ta.MACD(np.array(df['close']), fastperiod=MACD_FASTPERIOD, slowperiod=MACD_SLOWPERIOD, signalperiod=MACD_SIGNALPERIOD) MA5_index = df.shape[1] df['MA' + str(MA_FAST)]=pd.Series(ta.MA(np.array(df['close']), timeperiod=MA_FAST, matype=0),index=df.index) MA10_index = df.shape[1] df['MA' + str(MA_MIDDLE)]=pd.Series(ta.MA(np.array(df['close']), timeperiod=MA_MIDDLE, matype=0),index=df.index) MA20_index = df.shape[1] df['MA' + str(MA_SLOW)]=pd.Series(ta.MA(np.array(df['close']), timeperiod=MA_SLOW, matype=0),index=df.index) macd_index = df.shape[1] df['macd']=pd.Series(macd,index=df.index) #DIFF macdsignal_index = df.shape[1] df['macdsignal']=pd.Series(macdsignal,index=df.index)#DEA macdhist_index = df.shape[1] df['macdhist']=pd.Series(macdhist,index=df.index)#DIFF-DEA SignalMA5 = ta.MA(macdsignal, timeperiod=MA_FAST, matype=0) SignalMA10 = ta.MA(macdsignal, timeperiod=MA_MIDDLE, matype=0) SignalMA20 = ta.MA(macdsignal, timeperiod=MA_SLOW, matype=0) for dflen in range(36, df.shape[0] + 1): operate = 0 #在后面增加3列,对应的是 DIFF DEA DIFF-DEA MAlen = dflen #2个数组 1.DIFF、DEA均为正,DIFF向上突破DEA,买入信号。 2.DIFF、DEA均为负,DIFF向下跌破DEA,卖出信号。 #待修改 if df.iat[(dflen-1),macd_index]>0: if df.iat[(dflen-1),macdsignal_index]>0: if df.iat[(dflen-1),macd_index]>df.iat[(dflen-1),macdsignal_index] and df.iat[(dflen-2),macd_index]<=df.iat[(dflen-2),macdsignal_index]: # operate = operate + 10#买入 df.loc[dflen - 1, 'macd_DIFF_DEA'] = 1 else: if df.iat[(dflen-1),macdsignal_index]<0: if df.iat[(dflen-1),macd_index] == df.iat[(dflen-2),macdsignal_index]: # operate = operate - 10#卖出 df.loc[dflen - 1, 'macd_DIFF_DEA'] = -1 #3.DEA线与K线发生背离,行情反转信号。 if df.iat[(dflen-1),MA5_index]>=df.iat[(dflen-1),MA10_index] and df.iat[(dflen-1),MA10_index]>=df.iat[(dflen-1),MA20_index]:#K线上涨 if SignalMA5[MAlen-1]<=SignalMA10[MAlen-1] and SignalMA10[MAlen-1]<=SignalMA20[MAlen-1]: #DEA下降 operate = operate - 1 df.loc[dflen - 1, 'macd_DEA_K'] = 1 elif df.iat[(dflen-1),MA5_index]<=df.iat[(dflen-1),MA10_index] and df.iat[(dflen-1),MA10_index]<=df.iat[(dflen-1),MA20_index]:#K线下降 if SignalMA5[MAlen-1]>=SignalMA10[MAlen-1] and SignalMA10[MAlen-1]>=SignalMA20[MAlen-1]: #DEA上涨 operate = operate + 1 df.loc[dflen - 1, 'macd_DEA_K'] = -1 #4.分析MACD柱状线,由负变正,买入信号。 if df.iat[(dflen-1),macdhist_index]>0 and dflen >30 : for i in range(1,26): if df.iat[(dflen-1-i),macdhist_index]<=0:# # operate = operate + 5 df.loc[dflen - 1, 'macd_MACD_SELF'] = 1 break #由正变负,卖出信号 if df.iat[(dflen-1),macdhist_index]<0 and dflen >30 : for i in range(1,26): if df.iat[(dflen-1-i),macdhist_index]>=0:# # operate = operate - 5 df.loc[dflen - 1, 'macd_MACD_SELF'] = -1 break if operate != 0: df.loc[dflen - 1, 'macd_SUM'] = operate if operate == 0: continue cur_operator_price_close = df['close'][dflen - 1] if last_operator * operate > 0: continue # print code, name, 'operate=', operate, ' last_operator=', last_operator, # print ' cur_operator_price_close=', cur_operator_price_close, # print ' last_operator_price_close=', last_operator_price_close if operate > 0: # print 'operate=买入 price=',cur_operator_price_close pass else: # print 'operate=卖出 ', # print ' success=', (cur_operator_price_close > last_operator_price_close) # print ' curprice=',cur_operator_price_close, # print ' last_operator_price_close=', last_operator_price_close if last_operator_price_close != 0: if cur_operator_price_close > last_operator_price_close: success_count += 1 df.loc[dflen - 1, 'macd_result'] = 1 else: failed_count += 1 df.loc[dflen - 1, 'macd_result'] = -1 last_operator = operate last_operator_price_close = cur_operator_price_close df.to_csv(output_path) return (success_count,failed_count)
def calcMACD(code): success_count = 0 failed_count = 0 output_path = getMacdPath(code) last_operator_price_close = 0 last_operator = 0 if os.path.exists(output_path): df = pd.read_csv(output_path, index_col=0) success_count = df.macd_result[df.macd_result > 0].count() failed_count = df.macd_result[df.macd_result < 0].count() return (success_count, failed_count) df = base.getOneStockData(code) if df.empty: return (success_count, failed_count) dflen = df.shape[0] if dflen < 35: return (success_count, failed_count) df['macd_DIFF_DEA'] = pd.Series() df['macd_DEA_K'] = pd.Series() df['macd_MACD_SELF'] = pd.Series() df['macd_SUM'] = pd.Series() df['macd_result'] = pd.Series() macd, macdsignal, macdhist = ta.MACD(np.array(df['close']), fastperiod=MACD_FASTPERIOD, slowperiod=MACD_SLOWPERIOD, signalperiod=MACD_SIGNALPERIOD) MA5_index = df.shape[1] df['MA' + str(MA_FAST)] = pd.Series(ta.MA(np.array(df['close']), timeperiod=MA_FAST, matype=0), index=df.index) MA10_index = df.shape[1] df['MA' + str(MA_MIDDLE)] = pd.Series(ta.MA(np.array(df['close']), timeperiod=MA_MIDDLE, matype=0), index=df.index) MA20_index = df.shape[1] df['MA' + str(MA_SLOW)] = pd.Series(ta.MA(np.array(df['close']), timeperiod=MA_SLOW, matype=0), index=df.index) macd_index = df.shape[1] df['macd'] = pd.Series(macd, index=df.index) #DIFF macdsignal_index = df.shape[1] df['macdsignal'] = pd.Series(macdsignal, index=df.index) #DEA macdhist_index = df.shape[1] df['macdhist'] = pd.Series(macdhist, index=df.index) #DIFF-DEA SignalMA5 = ta.MA(macdsignal, timeperiod=MA_FAST, matype=0) SignalMA10 = ta.MA(macdsignal, timeperiod=MA_MIDDLE, matype=0) SignalMA20 = ta.MA(macdsignal, timeperiod=MA_SLOW, matype=0) for dflen in range(36, df.shape[0] + 1): operate = 0 #在后面增加3列,对应的是 DIFF DEA DIFF-DEA MAlen = dflen #2个数组 1.DIFF、DEA均为正,DIFF向上突破DEA,买入信号。 2.DIFF、DEA均为负,DIFF向下跌破DEA,卖出信号。 #待修改 if df.iat[(dflen - 1), macd_index] > 0: if df.iat[(dflen - 1), macdsignal_index] > 0: if df.iat[(dflen - 1), macd_index] > df.iat[ (dflen - 1), macdsignal_index] and df.iat[ (dflen - 2), macd_index] <= df.iat[(dflen - 2), macdsignal_index]: # operate = operate + 10#买入 df.loc[dflen - 1, 'macd_DIFF_DEA'] = 1 else: if df.iat[(dflen - 1), macdsignal_index] < 0: if df.iat[(dflen - 1), macd_index] == df.iat[(dflen - 2), macdsignal_index]: # operate = operate - 10#卖出 df.loc[dflen - 1, 'macd_DIFF_DEA'] = -1 #3.DEA线与K线发生背离,行情反转信号。 if df.iat[(dflen - 1), MA5_index] >= df.iat[(dflen - 1), MA10_index] and df.iat[ (dflen - 1), MA10_index] >= df.iat[(dflen - 1), MA20_index]: #K线上涨 if SignalMA5[MAlen - 1] <= SignalMA10[MAlen - 1] and SignalMA10[ MAlen - 1] <= SignalMA20[MAlen - 1]: #DEA下降 operate = operate - 1 df.loc[dflen - 1, 'macd_DEA_K'] = 1 elif df.iat[(dflen - 1), MA5_index] <= df.iat[(dflen - 1), MA10_index] and df.iat[ (dflen - 1), MA10_index] <= df.iat[(dflen - 1), MA20_index]: #K线下降 if SignalMA5[MAlen - 1] >= SignalMA10[MAlen - 1] and SignalMA10[ MAlen - 1] >= SignalMA20[MAlen - 1]: #DEA上涨 operate = operate + 1 df.loc[dflen - 1, 'macd_DEA_K'] = -1 #4.分析MACD柱状线,由负变正,买入信号。 if df.iat[(dflen - 1), macdhist_index] > 0 and dflen > 30: for i in range(1, 26): if df.iat[(dflen - 1 - i), macdhist_index] <= 0: # # operate = operate + 5 df.loc[dflen - 1, 'macd_MACD_SELF'] = 1 break #由正变负,卖出信号 if df.iat[(dflen - 1), macdhist_index] < 0 and dflen > 30: for i in range(1, 26): if df.iat[(dflen - 1 - i), macdhist_index] >= 0: # # operate = operate - 5 df.loc[dflen - 1, 'macd_MACD_SELF'] = -1 break if operate != 0: df.loc[dflen - 1, 'macd_SUM'] = operate if operate == 0: continue cur_operator_price_close = df['close'][dflen - 1] if last_operator * operate > 0: continue # print code, name, 'operate=', operate, ' last_operator=', last_operator, # print ' cur_operator_price_close=', cur_operator_price_close, # print ' last_operator_price_close=', last_operator_price_close if operate > 0: # print 'operate=买入 price=',cur_operator_price_close pass else: # print 'operate=卖出 ', # print ' success=', (cur_operator_price_close > last_operator_price_close) # print ' curprice=',cur_operator_price_close, # print ' last_operator_price_close=', last_operator_price_close if last_operator_price_close != 0: if cur_operator_price_close > last_operator_price_close: success_count += 1 df.loc[dflen - 1, 'macd_result'] = 1 else: failed_count += 1 df.loc[dflen - 1, 'macd_result'] = -1 last_operator = operate last_operator_price_close = cur_operator_price_close df.to_csv(output_path) return (success_count, failed_count)