def computeRunningMinMaxSignals(df, ema_feats_df, spike_5s, norm): max_to_here = pd.expanding_max(df['microprice_ema_200ms_ticks']) min_to_here = pd.expanding_min(df['microprice_ema_200ms_ticks']) if spike_5s >= 0: max_disl = max_to_here else: max_disl = min_to_here ema_feats_df['from_max_disl_ema_200ms_ticks'] = computeRelativeDislocation(df['microprice_ema_200ms_ticks'], max_disl, max_disl) ema_feats_df['from_max_disl_ema_2000ms_ticks'] = computeRelativeDislocation(df['microprice_ema_2000ms_ticks'], max_disl, max_disl) ema_feats_df['from_max_disl_ema_10000ms_ticks'] = computeRelativeDislocation(df['microprice_ema_10000ms_ticks'], max_disl, max_disl) rev_from_max_disl_to_here = df['microprice_ema_200ms_ticks'] - max_disl max_rev_to_here = pd.expanding_max(rev_from_max_disl_to_here) min_rev_to_here = pd.expanding_min(rev_from_max_disl_to_here) if spike_5s >= 0: max_rev = max_disl + min_rev_to_here else: max_rev = max_disl + max_rev_to_here ema_feats_df['from_max_rev_ema_200ms_ticks'] = computeRelativeDislocation(df['microprice_ema_200ms_ticks'], max_rev, max_rev) ema_feats_df['from_max_rev_ema_2000ms_ticks'] = computeRelativeDislocation(df['microprice_ema_2000ms_ticks'], max_rev, max_rev) ema_feats_df['from_max_rev_ema_10000ms_ticks'] = computeRelativeDislocation(df['microprice_ema_10000ms_ticks'], max_rev, max_rev) return (ema_feats_df, df)
def update_price(self, date, sid): try: p = pd.Series(self.collection.find_one({'sid': sid, 'date': date, 'dname': 'price'})['dvalue']) except: self.logger.warning('No price found for {} on {}', sid, date) return p.index = [datetime.strptime(date, '%Y%m%d')+timedelta(milliseconds=int(s)) for s in p.index] df5 = p.resample('5min', 'ohlc', label='right', closed='right') df5.columns = ['into', 'inth', 'intl', 'intc'] df5['lstl'] = pd.expanding_min(p).resample('5min', 'last', label='right', closed='right') df5['lsth'] = pd.expanding_max(p).resample('5min', 'last', label='right', closed='right') df5['vlty'] = p.resample('5min', lambda x: x.std(), label='right', closed='right') df5['lstp'] = p.resample('5min', 'last', label='right', closed='right') df5['tvwp'] = p.resample('5min', 'mean', label='right', closed='right') df5.index = [dt.strftime('%H%M%S') for dt in df5.index] df5 = df5.ix[times_5min] for dname, ser in df5.iteritems(): self.db.IF_5min.update({'sid': sid, 'date': date, 'dname': dname}, {'$set': {'dvalue': ser.to_dict()}}, upsert=True) df1 = p.resample('1min', 'ohlc', label='right', closed='right') df1.columns = ['into', 'inth', 'intl', 'intc'] df1['lstl'] = pd.expanding_min(p).resample('1min', 'last', label='right', closed='right') df1['lstl'] = pd.expanding_max(p).resample('1min', 'last', label='right', closed='right') df1['vlty'] = p.resample('1min', lambda x: x.std(), label='right', closed='right') df1['lstp'] = p.resample('1min', 'last', label='right', closed='right') df1.index = [dt.strftime('%H%M%S') for dt in df1.index] df1 = df1.ix[times_1min] for dname, ser in df1.iteritems(): self.db.IF_1min.update({'sid': sid, 'date': date, 'dname': dname}, {'$set': {'dvalue': ser.to_dict()}}, upsert=True)
def indicator_KDJ(stock_data): #KDJ指标计算函数 # 计算KDJ指标 low_list = pd.rolling_min(stock_data['low'], 9) #9天为一个周期,但前8个值为NaN low_list.fillna(value=pd.expanding_min(stock_data['low']), inplace=True) #将NaN用累积窗口计算的最小值代替 high_list = pd.rolling_max(stock_data['high'], 9) high_list.fillna(value=pd.expanding_max(stock_data['high']), inplace=True) rsv = (stock_data['close'] - low_list) / (high_list - low_list) * 100 stock_data['KDJ_K'] = pd.ewma(rsv, com=2, adjust=False) stock_data['KDJ_D'] = pd.ewma(stock_data['KDJ_K'], com=2, adjust=False) stock_data['KDJ_J'] = 3 * stock_data['KDJ_K'] - 2 * stock_data['KDJ_D'] # 计算KDJ指标金叉、死叉情况 stock_data['KDJ_金叉死叉'] = '' kdj_position = stock_data['KDJ_K'] > stock_data['KDJ_D'] stock_data.loc[kdj_position[(kdj_position == True) & (kdj_position.shift() == False)].index, 'KDJ_金叉死叉'] = '金叉' #前一天K<D,当天K>D stock_data.loc[kdj_position[(kdj_position == False) & (kdj_position.shift() == True)].index, 'KDJ_金叉死叉'] = '死叉' # 通过复权价格计算接下来几个交易日的收益率 for n in [1, 2, 3, 5, 10, 20]: stock_data['接下来' + str(n) + '个交易日涨跌幅'] = stock_data['close'].shift( -1 * n) / stock_data['close'] - 1.0 stock_data.dropna(how='any', inplace=True) # 删除所有有空值的数据行 # 筛选出KDJ金叉的数据,并将这些数据合并到all_stock中 stock_data = stock_data[(stock_data['KDJ_金叉死叉'] == '金叉')] if not stock_data.empty: return stock_data
def get_kdj(code): stock_data = ts.get_k_data(code) # kdj low_list = pd.rolling_min(stock_data['low'], 9) low_list.fillna(value=pd.expanding_min(stock_data['low']), inplace=True) high_list = pd.rolling_max(stock_data['high'], 9) high_list.fillna(value=pd.expanding_max(stock_data['high']), inplace=True) rsv = (stock_data['close'] - low_list) / (high_list - low_list) * 100 stock_data['kdj_k'] = pd.ewma(rsv, com=2) stock_data['kdj_d'] = pd.ewma(stock_data['kdj_k'], com=2) stock_data['kdj_j'] = 3 * stock_data['kdj_k'] - 2 * stock_data['kdj_d'] # 用今天的j值和昨天比较 kdj_j = stock_data['kdj_j'] yesterdayJ = kdj_j[kdj_j.size - 2] todayJ = kdj_j[kdj_j.size - 1] kdj_k = stock_data['kdj_k'] todayK = kdj_k[kdj_k.size - 1] # 如果今天的j值大于昨天的j值才继续后面的逻辑 if (todayJ > yesterdayJ and todayK < float(20)): # 计算价格5日百分比 stock_data = stock_data[stock_data.date > str(dc.get_the_day_before_today(1))] stock_data['kdj_ok'] = 1 else: stock_data = stock_data[stock_data.date > str(dc.get_the_day_before_today(1))] stock_data['kdj_ok'] = 0 return stock_data
def KDJ(date, N=9, M1=3, M2=3): low_list = pd.rolling_min(date[low], N) low_list.fillna(value=pd.expanding_min(date[low]), inplace=True) high_list = pd.rolling_max(date[high], N) high_list.fillna(value=pd.expanding_max(date[high]), inplace=True) rsv = (date['close'] - low_list) / (high_list - low_list) * 100 date['KDJ_K'] = pd.ewma(rsv, com=2) date['KDJ_D'] = pd.ewma(date['KDJ_K'], com=2) date['KDJ_J'] = 3 * date['KDJ_K'] - 2 * date['KDJ_D']
def kdj(data,date,m1,m2): data_use=data[['high','low','open','close']] data['lown'] = pd.rolling_min(data_use['low'], date) data.lown.fillna(value=pd.expanding_min(data_use['low']), inplace=True) data['highn'] = pd.rolling_max(data_use['high'], date) data.highn.fillna(value=pd.expanding_max(data_use['high']), inplace=True) data['rsv']=(data['close'] - data['lown']) / (data['highn'] - data['lown']) * 100 data['kdj_k'] = pd.ewma(data['rsv'], m1) data['kdj_d'] = pd.ewma(data['kdj_k'], m2) data['kdj_j'] = 3 * data['kdj_k'] - 2 * data['kdj_d']
def analysis_kdjv2(code): filename='./stockdata/data/last3year/'+code+'.csv' stock_dataT = pd.read_csv(filename, parse_dates=['date']) #stock_dataT.sort('date', inplace=True) stock_data=stock_dataT.loc[:,('date', 'high', 'low', 'close', 'p_change')] # 计算KDJ指标 stock_data['low_list'] = pd.rolling_min(stock_data['low'], 9) stock_data['low_list'].fillna(value=pd.expanding_min(stock_data['low']), inplace=True) stock_data['high_list'] = pd.rolling_max(stock_data['high'], 9) stock_data['high_list'].fillna(value=pd.expanding_max(stock_data['high']), inplace=True) stock_data['rsv'] = (stock_data['close'] - stock_data['low_list']) / (stock_data['high_list'] - stock_data['low_list']) * 100 #stock_data['rsv']=(stock_data['close'] - low_list) / (high_list - low_list) * 100 stock_data['KDJ_K'] = pd.ewma(stock_data['rsv'], com=3) stock_data['KDJ_D'] = pd.ewma(stock_data['KDJ_K'], com=3) stock_data['KDJ_J'] = 3 * stock_data['KDJ_K'] - 2 * stock_data['KDJ_D'] # 计算KDJ指标金叉、死叉情况 ###通常就敏感性而言,J值最强,K值次之,D值最慢,而就安全性而言,J值最差,K值次之,D值最稳 ##金叉用1表示,死叉用0表示 buyi=stock_data[(stock_data['KDJ_K'] > stock_data['KDJ_D'])&(stock_data['KDJ_K'].shift(1) < stock_data['KDJ_D'].shift(1))].index stock_data.loc[buyi,'Signal'] = 1 selli=stock_data[(stock_data['KDJ_K'] < stock_data['KDJ_D'])&(stock_data['KDJ_K'].shift(1) > stock_data['KDJ_D'].shift(1))].index stock_data.loc[selli,'Signal'] = 0 #kdj_position = stock_data['KDJ_K'] > stock_data['KDJ_D'] #stock_data.loc[kdj_position[(kdj_position == True) & (kdj_position.shift() == False)].index, 'KDJ_BS'] = 1 #stock_data.loc[kdj_position[(kdj_position == False) & (kdj_position.shift() == True)].index, 'KDJ_BS'] = 0 stock_data['position']=stock_data['Signal'].shift(1) stock_data['position'].fillna(method='ffill', inplace=True) #当仓位为1时,已当天的开盘价买入股票,当仓位为0时,以收盘价卖出该股份。计算从数据期内的收益 stock_data['Cash_index'] = ((stock_data['p_change']/100) * stock_data['position'] + 1.0).cumprod() initial_idx = 1 #initial_idx = stock_data.iloc[0]['close'] / (1 + (stock_data.iloc[0]['p_change']/100)) stock_data['Cash_index'] *= initial_idx print 'The KDJ Backwards methon Signal:' Make_decision(stock_data) # 通过复权价格计算接下来几个交易日的收益率 for n in [1, 2, 3, 5, 10, 20]: stock_data['CP_next_'+str(n)+'_days'] =(stock_data['close'].shift(-1*n) / stock_data['close'] - 1.0)*100 #stock_data.dropna(how='any', inplace=True)# 删除所有有空值的数据行 # ========== 将算好的数据输出到csv文件 - 注意:这里请填写输出文件在您电脑中的路径 ##统计出现买点时点的数据 dd=stock_data[stock_data['Signal']==1] print_return_next_n_day(dd) codedir='./output/A/'+code+os.sep if not os.path.exists(codedir): os.mkdir(codedir) # ==========计算每年指数的收益以及海龟交易法则的收益 stock_data['p_change_KDJV2'] = (stock_data['p_change']) * stock_data['position'] year_rtn = stock_data.set_index('date')[['p_change', 'p_change_KDJV2']].\ resample('A', how=lambda x: (x/100+1.0).prod() - 1.0) * 100 year_rtn.to_csv(codedir+'kdjv2_year.csv', encoding='gbk') stock_data.to_csv(codedir+'kdjv2.csv',encoding='gbk',index=False) stock_data.tail(20).to_csv(codedir+'kdjv2_Signal.csv',encoding='gbk',index=False) print 'the share %s trading sign for KDJV2:'%code print stock_data.tail(5) return
def kdj(stock): low_list = pd.rolling_min(stock.low, 9) low_list.fillna(value=pd.expanding_min(stock.low), inplace=True) high_list = pd.rolling_max(stock.high, 9) high_list.fillna(value=pd.expanding_max(stock.high), inplace=True) rsv = (stock.close - low_list) / (high_list - low_list) * 100 k = pd.ewma(rsv, com=2) d = pd.ewma(k, com=2) j = 3 * k[2:] - 2 * d return k, d, j
def kdj(stock): low_list = pd.rolling_min(stock.low, 9) low_list.fillna(value = pd.expanding_min(stock.low), inplace = True) high_list = pd.rolling_max(stock.high, 9) high_list.fillna(value = pd.expanding_max(stock.high), inplace = True) rsv = (stock.close - low_list) / (high_list - low_list) * 100 k = pd.ewma(rsv, com =2) d = pd.ewma(k, com =2) j = 3 * k[2:] - 2 *d return k, d, j
def get_kdj(stock_data): # kdj计算 low_list = pd.rolling_min(stock_data['low'], 9) low_list.fillna(value=pd.expanding_min(stock_data['low']), inplace=True) high_list = pd.rolling_max(stock_data['high'], 9) high_list.fillna(value=pd.expanding_max(stock_data['high']), inplace=True) rsv = (stock_data['close'] - low_list) / (high_list - low_list) * 100 # 增加kdj数据到 stock_data中 stock_data['kdj_k'] = round(pd.ewma(rsv, com=2), 2) stock_data['kdj_d'] = round(pd.ewma(stock_data['kdj_k'], com=2), 2) stock_data['kdj_j'] = round( 3 * stock_data['kdj_k'] - 2 * stock_data['kdj_d'], 2) return stock_data
def calKDJ(data, N=0, M=0): if N == 0: N = 9 if M == 0: M = 2 low_list = pd.rolling_min(data['low'], N) low_list.fillna(value=pd.expanding_min(data['low']), inplace=True) high_list = pd.rolling_max(data['high'], N) high_list.fillna(value=pd.expanding_max(data['high']), inplace=True) rsv = (data['close'] - low_list) / (high_list - low_list) * 100 KDJ_K = pd.ewma(rsv, com=M) KDJ_D = pd.ewma(KDJ_K, com=M) KDJ_J = 3 * KDJ_K - 2 * KDJ_D #kdjdata.fillna(0, inplace=True) return low_list, high_list, rsv, KDJ_K, KDJ_D, KDJ_J
def analyse(self): # Logger.log(logging.INFO, "Analyse Strategy", {"scope":__name__, "Rule 1":self._rule1, "Rule 2":self._rule2, "Rule 3":self._rule3, "Type":self._type}) connection = sqlite3.connect(pyswing.database.pySwingDatabase) query = self.analyseStrategySql % (self._rule1, self._rule2, self._rule3, self._exit, self._type) self._strategyData = read_sql_query(query, connection, 'Date') self._strategyData['ExitValueAfterCosts'] = self._strategyData['ExitValue'] - 0.2 connection.close() exitValueDataFrame = self._strategyData.ix[:,'ExitValueAfterCosts'] mean = exitValueDataFrame.mean() median = exitValueDataFrame.median() sum = exitValueDataFrame.sum() count = exitValueDataFrame.count() tradesPerYear = count / 10 sharpeRatio = sqrt(tradesPerYear) * exitValueDataFrame.mean() / exitValueDataFrame.std() self._strategyData["Sum"] = expanding_sum(exitValueDataFrame) self._strategyData["Max"] = expanding_max(self._strategyData["Sum"]) self._strategyData["Min"] = expanding_min(self._strategyData["Sum"]) self._strategyData["DD"] = self._strategyData["Max"] - self._strategyData["Min"] runningSum = expanding_sum(exitValueDataFrame) max2here = expanding_max(runningSum) dd2here = runningSum - max2here drawDown = dd2here.min() Logger.log(logging.INFO, "Analysing Strategy", {"scope":__name__, "Rule 1":self._rule1, "Rule 2":self._rule2, "Rule 3":self._rule3, "Exit":self._exit, "Type":self._type, "Mean":str(mean), "Median":str(median), "Sum":str(sum), "Count":str(count), "SharpeRatio":str(sharpeRatio), "DrawDown":str(drawDown)}) connection = sqlite3.connect(pyswing.database.pySwingDatabase) c = connection.cursor() deleteSql = self.deleteStrategySql % (pyswing.globals.pySwingStrategy, self._rule1, self._rule2, self._rule3, self._exit, self._type) c.executescript(deleteSql) connection.commit() insertSql = self.insertStrategySql % (pyswing.globals.pySwingStrategy, self._rule1, self._rule2, self._rule3, self._exit, self._type, str(mean), str(median), str(sum), str(count), str(sharpeRatio), str(drawDown)) c.executescript(insertSql) connection.commit() c.close() connection.close()
def get_kdj_history(code): stock_data = ts.get_k_data(code) # kdj low_list = pd.rolling_min(stock_data['low'], 9) low_list.fillna(value=pd.expanding_min(stock_data['low']), inplace=True) high_list = pd.rolling_max(stock_data['high'], 9) high_list.fillna(value=pd.expanding_max(stock_data['high']), inplace=True) rsv = (stock_data['close'] - low_list) / (high_list - low_list) * 100 stock_data['kdj_k'] = round(pd.ewma(rsv, com=2), 2) stock_data['kdj_d'] = round(pd.ewma(stock_data['kdj_k'], com=2), 2) stock_data['kdj_j'] = round(3 * stock_data['kdj_k'] - 2 * stock_data['kdj_d'], 2) # 用今天的j值和昨天比较 kdj_j = stock_data['kdj_j'] if (kdj_j.size < 6): stock_data = stock_data.tail(1) stock_data['kdj_k'] = 0 stock_data['kdj_ok'] = 0 return stock_data yesterdayJ = kdj_j[kdj_j.size - 6] todayJ = kdj_j[kdj_j.size - 5] kdj_k = stock_data['kdj_k'] todayK = kdj_k[kdj_k.size - 5] # 如果今天的j值大于昨天的j值才继续后面的逻辑 if (todayJ > yesterdayJ and todayK < float(20)): # 计算价格5日百分比 stock_data_copy = stock_data[:] stock_data_copy = stock_data_copy.tail(5) stock_data_copy['indexNum'] = [1, 2, 3, 4, 5] stock_data_copy = stock_data_copy.sort(columns='high') stock_data_copy = stock_data_copy.tail(1) maxValue = stock_data_copy.high.values maxDate = stock_data_copy.date.values stock_data = stock_data.tail(5) stock_data = stock_data.head(1) stock_data['kdj_ok'] = 1 highPercent = maxValue / stock_data.close.values[0] stock_data['highPercent'] = (round(highPercent, 3) * 100) - 100 stock_data['highDate'] = maxDate stock_data['highDays'] = stock_data_copy.indexNum.values else: stock_data = stock_data.tail(1) stock_data['kdj_ok'] = 0 return stock_data
def expanding_smoother(self, data, stype='rolling_mean', min_periods=None, freq=None): """ Perform a expanding smooting on the data for a complete help refer to http://pandas.pydata.org/pandas-docs/dev/computation.html :param data: pandas dataframe input data :param stype: soothing type :param min_periods: periods :param freq: frequence smoothing types: expanding_count Number of non-null observations expanding_sum Sum of values expanding_mean Mean of values expanding_median Arithmetic median of values expanding_min Minimum expanding_max Maximum expandingg_std Unbiased standard deviation expanding_var Unbiased variance expanding_skew Unbiased skewness (3rd moment) expanding_kurt Unbiased kurtosis (4th moment) """ if stype == 'count': newy = pd.expanding_count(data, min_periods=min_periods, freq=freq) if stype == 'sum': newy = pd.expanding_sum(data, min_periods=min_periods, freq=freq) if stype == 'mean': newy = pd.expanding_mean(data, min_periods=min_periods, freq=freq) if stype == 'median': newy = pd.expanding_median(data, min_periods=min_periods, freq=freq) if stype == 'min': newy = pd.expanding_min(data, min_periods=min_periods, freq=freq) if stype == 'max': newy = pd.expanding_max(data, min_periods=min_periods, freq=freq) if stype == 'std': newy = pd.expanding_std(data, min_periods=min_periods, freq=freq) if stype == 'var': newy = pd.expanding_var(data, min_periods=min_periods, freq=freq) if stype == 'skew': newy = pd.expanding_skew(data, min_periods=min_periods, freq=freq) if stype == 'kurt': newy = pd.expanding_kurt(data, min_periods=min_periods, freq=freq) return newy
def lm_kdj(df, n,ksgn='close'): ''' 【输入】 df, pd.dataframe格式数据源 n,时间长度 ksgn,列名,一般是:close收盘价 【输出】 df, pd.dataframe格式数据源, 增加了一栏:_{n},输出数据 ''' lowList= pd.rolling_min(df['low'], n) lowList.fillna(value=pd.expanding_min(df['low']), inplace=True) highList = pd.rolling_max(df['high'], n) highList.fillna(value=pd.expanding_max(df['high']), inplace=True) rsv = (df[ksgn] - lowList) / (highList - lowList) * 100 df['k'] = pd.ewma(rsv,com=2) df['d'] = pd.ewma(df['k'],com=2) df['j'] = 3.0 * df['k'] - 2.0 * df['d'] #print('n df',len(df)) return df
def computeRunningMinMaxSignals(self, df, start_dt, spike_ticks, spike_5s_pred): # print start_dt pre_event_snapshot_dt = start_dt - timedelta(seconds=2.5) pre_event_snapshot_loc = df.index.get_loc(pre_event_snapshot_dt) # print df.ix[self.cur_loc, 'time'], len(df.ix[pre_event_snapshot_loc : self.cur_loc+1, 'microprice_ema_200ms']) max_to_here = pd.expanding_max(df.ix[pre_event_snapshot_loc : self.cur_loc+1, 'microprice_ema_200ms'])[-1] min_to_here = pd.expanding_min(df.ix[pre_event_snapshot_loc : self.cur_loc+1, 'microprice_ema_200ms'])[-1] max_to_here_ticks = self.priceTicks(max_to_here) min_to_here_ticks = self.priceTicks(min_to_here) if (spike_ticks + spike_5s_pred)/2.0 >= 0: max_disl = max_to_here_ticks else: max_disl = min_to_here_ticks #df.ix[self.cur_loc, 'from_max_disl_ema_200ms_ticks'] = computeRelativeDislocation(df.ix[self.cur_loc, 'microprice_ema_200ms_ticks'], max_disl) df.ix[self.cur_loc, 'max_disl_ema_200ms_ticks'] = max_disl #df[self.cur_loc]['max_disl_ema_200ms_ticks'] = max_disl return df
def indicator_KDJ(stock_data):#KDJ指标计算函数 # 计算KDJ指标 low_list = pd.rolling_min(stock_data['low'], 9) #9天为一个周期,但前8个值为NaN low_list.fillna(value=pd.expanding_min(stock_data['low']), inplace=True) #将NaN用累积窗口计算的最小值代替 high_list = pd.rolling_max(stock_data['high'], 9) high_list.fillna(value=pd.expanding_max(stock_data['high']), inplace=True) rsv = (stock_data['close'] - low_list) / (high_list - low_list) * 100 stock_data['KDJ_K'] = pd.ewma(rsv, com=2, adjust=False) stock_data['KDJ_D'] = pd.ewma(stock_data['KDJ_K'], com=2, adjust=False) stock_data['KDJ_J'] = 3 * stock_data['KDJ_K'] - 2 * stock_data['KDJ_D'] # 计算KDJ指标金叉、死叉情况 stock_data['KDJ_金叉死叉'] = '' kdj_position = stock_data['KDJ_K'] > stock_data['KDJ_D'] stock_data.loc[kdj_position[(kdj_position == True) & (kdj_position.shift() == False)].index, 'KDJ_金叉死叉'] = '金叉' #前一天K<D,当天K>D stock_data.loc[kdj_position[(kdj_position == False) & (kdj_position.shift() == True)].index, 'KDJ_金叉死叉'] = '死叉' # 通过复权价格计算接下来几个交易日的收益率 for n in [1, 2, 3, 5, 10, 20]: stock_data['接下来'+str(n)+'个交易日涨跌幅'] = stock_data['close'].shift(-1*n) / stock_data['close'] - 1.0 stock_data.dropna(how='any', inplace=True)# 删除所有有空值的数据行 # 筛选出KDJ金叉的数据,并将这些数据合并到all_stock中 stock_data = stock_data[(stock_data['KDJ_金叉死叉'] == '金叉')] if not stock_data.empty: return stock_data
def plot_Ndays_Break(self, stock_df): N1 = 42 N2 = 30 stock_df['N1_High'] = pd.rolling_max(stock_df.High, window=N1) #计算最近N1个交易日最高价 stock_df['N1_High'] = stock_df['N1_High'].shift(1) expan_max = pd.expanding_max(stock_df.Close) stock_df['N1_High'].fillna(value=expan_max, inplace=True) #目前出现过的最大值填充前N1个nan stock_df['N2_Low'] = pd.rolling_min(stock_df.Low, window=N2) #计算最近N2个交易日最低价 stock_df['N2_Low'] = stock_df['N2_Low'].shift(1) expan_min = pd.expanding_min(stock_df.Close) stock_df['N2_Low'].fillna(value=expan_min, inplace=True) #目前出现过的最小值填充前N2个nan dispCont_List = [] break_pd = pd.DataFrame() for kl_index in np.arange(0, stock_df.shape[0]): today = stock_df.ix[kl_index] """ 收盘价超过N2最低价 卖出股票持有""" if today['Close'] < today['N2_Low']: break_pd = break_pd.append(today) dispCont_List.append( "向下突破:" + stock_df.index[kl_index].strftime('%Y-%m-%d') + ',' + str(today['Close']) + '\n') #向下突破和价格 """ 收盘价超过N1最高价 买入股票持有""" if today['Close'] > today['N1_High']: break_pd = break_pd.append(today) dispCont_List.append( "向上突破:" + stock_df.index[kl_index].strftime('%Y-%m-%d') + ',' + str(today['Close']) + '\n') #向上突破和价格 return break_pd, dispCont_List
# ==========计算海龟交易法则的买卖点 # 设定海龟交易法则的两个参数,当收盘价大于最近N1天的最高价时买入,当收盘价低于最近N2天的最低价时卖出 # 这两个参数可以自行调整大小,但是一般N1 > N2 N1 = 20 N2 = 10 # 通过rolling_max方法计算最近N1个交易日的最高价 index_data['最近N1个交易日的最高点'] = pd.rolling_max(index_data['high'], N1) # 对于上市不足N1天的数据,取上市至今的最高价 index_data['最近N1个交易日的最高点'].fillna(value=pd.expanding_max(index_data['high']), inplace=True) # 通过相似的方法计算最近N2个交易日的最低价 index_data['最近N2个交易日的最低点'] = pd.rolling_min(index_data['low'], N1) index_data['最近N2个交易日的最低点'].fillna(value=pd.expanding_min(index_data['low']), inplace=True) # 当当天的【close】> 昨天的【最近N1个交易日的最高点】时,将【收盘发出的信号】设定为1 buy_index = index_data[index_data['close'] > index_data['最近N1个交易日的最高点'].shift(1)].index index_data.loc[buy_index, '收盘发出的信号'] = 1 # 当当天的【close】< 昨天的【最近N2个交易日的最低点】时,将【收盘发出的信号】设定为0 sell_index = index_data[index_data['close'] < index_data['最近N2个交易日的最低点'].shift(1)].index index_data.loc[sell_index, '收盘发出的信号'] = 0 # 计算每天的仓位,当天持有上证指数时,仓位为1,当天不持有上证指数时,仓位为0 index_data['当天的仓位'] = index_data['收盘发出的信号'].shift(1) index_data['当天的仓位'].fillna(method='ffill', inplace=True) # 取1992年之后的数据,排出较早的数据 index_data = index_data[index_data['date'] >= pd.to_datetime('19930101')]
data1['fast_line'] = pd.rolling_mean(data1['close'], h) data1['slow_line'] = pd.rolling_mean(data1['close'], k) data1['fast_line'] = data1['fast_line'].fillna( value=pd.expanding_mean(data1['close'])) data1['slow_line'] = data1['slow_line'].fillna( value=pd.expanding_mean(data1['close'])) data1['dist_%s_%s' % (k, h)] = data1['fast_line'] - data1['slow_line'] for h in range(10, 26, 5): data1['fast_line'] = '' data1['slow_line'] = '' data1['fast_line'] = pd.rolling_max(data1['high'].shift(1), h) data1['slow_line'] = pd.rolling_min(data1['low'].shift(1), h) data1['fast_line'] = data1['fast_line'].fillna( value=pd.expanding_max(data1['high'])) data1['slow_line'] = data1['slow_line'].fillna( value=pd.expanding_min(data1['low'])) data1['dist_high_%s' % h] = data1['high'] - data1['fast_line'] data1['dist_low_%s' % h] = data1['low'] - data1['slow_line'] data1 = MACD(data1, 12, 26, 9) data2 = pd.read_csv('rb888_2017.csv', parse_dates=True, index_col='time') data2.reset_index(inplace=True) data2['log_return'] = np.log(data2['close'] / data2['close'].shift(1)) data2['log_return'] = data2['log_return'].fillna(0) for h, k in [(5, 20), (15, 20), (5, 10), (5, 15), (10, 15)]: data2['fast_line'] = '' data2['slow_line'] = '' data2['fast_line'] = pd.rolling_mean(data2['close'], h) data2['slow_line'] = pd.rolling_mean(data2['close'], k) data2['fast_line'] = data2['fast_line'].fillna( value=pd.expanding_mean(data2['close'])) data2['slow_line'] = data2['slow_line'].fillna(
这两个参数可以自行调整大小,但是一般N1 > N2 ''' N1 = 20 N2 = 10 ''' 通过rolling_max方法计算最近N1个交易日的最高价, 最近N1个交易日的最高点: MaxIn_N1 对于上市不足N1天的数据,取上市至今的最高价 ''' index_data['MaxIn_N1'] = pandas.rolling_max(index_data['High'], N1) index_data['MaxIn_N1'].fillna(value=pandas.expanding_max(index_data['High']), inplace=True) ''' 通过rolling_min方法计算最近N2个交易日的最低价, MinIn_N2:MinIn_N2 ''' index_data['MinIn_N2'] = pandas.rolling_min(index_data['Low'], N1) index_data['MinIn_N2'].fillna(value=pandas.expanding_min(index_data['Low']), inplace=True) # 当当天的【close】> 昨天的【MaxIn_N1】时,将【收盘发出的信号】设定为1 buy_index = index_data[index_data['Close'] > index_data['MaxIn_N1'].shift(1)].index index_data.loc[buy_index, '收盘发出的信号'] = 1 # 当当天的【close】< 昨天的【MinIn_N2】时,将【收盘发出的信号】设定为0 sell_index = index_data[index_data['Close'] < index_data['MinIn_N2'].shift(1)].index index_data.loc[sell_index, '收盘发出的信号'] = 0 ''' The following sources are used for the test of the above function # 计算每天的仓位,当天持有上证指数时,仓位为1,当天不持有上证指数时,仓位为0 index_data['当天的仓位'] = index_data['收盘发出的信号'].shift(1) index_data['当天的仓位'].fillna(method='ffill', inplace=True)
data['fast_line']=pd.rolling_mean(data['open'],h) data['slow_line']=pd.rolling_mean(data['open'],k) data['fast_line']=data['fast_line'].fillna(value=pd.expanding_mean(data['open'])) data['slow_line']=data['slow_line'].fillna(value=pd.expanding_mean(data['open'])) data['dist_%s_%s'%(k,h)]=data['fast_line']-data['slow_line'] for i in range(5,31,5): data['MA_%s'%i]=pd.rolling_mean(data['open'],i) data['MA_%s'%i]=data['MA_%s'%i].fillna(0)-data['open'] data=MACD(data,12,26,9) for h in range(10,26,5): data['fast_line']='' data['slow_line']='' data['fast_line']=pd.rolling_max(data['high'].shift(1),h) data['slow_line']=pd.rolling_min(data['low'].shift(1),h) data['fast_line']=data['fast_line'].fillna(value=pd.expanding_max(data['high'])) data['slow_line']=data['slow_line'].fillna(value=pd.expanding_min(data['low'])) data['dist_high_%s'%h]=data['high']-data['fast_line'] data['dist_low_%s'%h]=data['low']-data['slow_line'] #引入隐马尔科夫模型 factor_list=['close','volume','dist_10_5','dist_15_5','dist_20_5','dist_15_10','dist_20_10','dist_20_15','dist_30_15','log_return','log_return_5','MACD','dist_high_10','dist_high_15','dist_high_20','dist_high_25','dist_low_10','dist_low_15','dist_low_20','dist_low_25','MA_5','MA_10','MA_15','MA_20','MA_25','MA_30'] for i in factor_list: X = np.column_stack([data[i]]) model = GaussianHMM(n_components=3, covariance_type="diag", n_iter=1000,random_state=0).fit(X) hidden_states = model.predict(X) plt.figure(figsize=(15, 8)) for k in range(model.n_components): idx = (hidden_states==k) plt.plot_date(data['time'][idx],data['close'][idx],'.',label='%dth hidden state'%k,lw=1) plt.legend() plt.grid(1) plt.savefig('C:/Users/Public/Documents/Python Scripts/隐马尔科夫状态刻画图集开盘价(2015)/%s.png'%(i))
for root, dirs, files in os.walk('all_trading_data/stock data'): if files: for f in files: if '.csv' in f: stock_code_list.append(f.split('.csv')[0]) all_stock = pd.DataFrame() for code in stock_code_list: print(code) stock_data = pd.read_csv('all_trading_data/stock data/' + code + '.csv', parse_dates=[1]) stock_data.sort('date', inplace=True) low_list = pd.rolling_min(stock_data['low'], 9) low_list.fillna(value=pd.expanding_min(stock_data['low']), inplace=True) high_list = pd.rolling_max(stock_data['high'], 9) high_list.fillna(value=pd.expanding_max(stock_data['high']), inplace=True) rsv = (stock_data['close'] - low_list) / (high_list - low_list) * 100 stock_data['KDJ_K'] = pd.ewma(rsv, com=2) stock_data['KDJ_D'] = pd.ewma(stock_data['KDJ_K'], com=2) stock_data['KDJ_J'] = 3 * stock_data['KDJ_K'] - 2 * stock_data['KDJ_D'] stock_data['KDJ_金叉死叉'] = '' kdj_position = stock_data['KDJ_K'] > stock_data['KDJ_D'] stock_data.loc[kdj_position[(kdj_position == True) & (kdj_position.shitf() == False)].index, 'KDJ_金叉死叉'] = '金叉' stock_data.loc[kdj_position[(kdj_position == False) & (kdj_position.shitf() == True)].index, 'KDJ_金叉死叉'] = '死叉'
def LLV(self, param): if param[1] == 0: return pd.expanding_min(param[0]) return pd.rolling_min(param[0], param[1])
def longsklearn(code='999999', ptype='f',dtype='d',start=None,end=None): # code='999999' # dtype = 'w' # start = '2014-09-01' # start = None # end='2015-12-23' # end = None df = tdd.get_tdx_append_now_df(code, ptype, start, end).sort_index(ascending=True) # if not dtype == 'd': # df = tdd.get_tdx_stock_period_to_type(df, dtype).sort_index(ascending=True) dw = tdd.get_tdx_stock_period_to_type(df, dtype).sort_index(ascending=True) # print df[:1] h = df.loc[:, ['open', 'close', 'high', 'low']] highp = h['high'].values lowp = h['low'].values openp = h['open'].values closep = h['close'].values lr = LinearRegression() x = np.atleast_2d(np.linspace(0, len(closep), len(closep))).T lr.fit(x, closep) LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False) xt = np.atleast_2d(np.linspace(0, len(closep) + 200, len(closep) + 200)).T yt = lr.predict(xt) # plt.plot(xt,yt,'-g',linewidth=5) # plt.plot(closep) bV = [] bP = [] uV = [] uP = [] for i in range(1, len(highp) - 1): # if highp[i] <= highp[i - 1] and highp[i] < highp[i + 1] and lowp[i] <= lowp[i - 1] and lowp[i] < lowp[i + 1]: if lowp[i] <= lowp[i - 1] and lowp[i] < lowp[i + 1]: bV.append(lowp[i]) bP.append(i) for i in range(1, len(highp) - 1): # if highp[i] >= highp[i - 1] and highp[i] > highp[i + 1] and lowp[i] >= lowp[i - 1] and lowp[i] > lowp[i + 1]: if highp[i] >= highp[i - 1] and highp[i] > highp[i + 1]: uV.append(highp[i]) uP.append(i) print highp print "uV:%s" % uV[:1] print "uP:%s" % uP[:1] print "bV:%s" % bV[:1] print "bP:%s" % bP[:1] sV, sP = LIS(uV) dV, dP = LIS(bV) print "sV:%s" % sV[:1] print "sP:%s" % sP[:1] print "dV:%s" % dV[:1] print "dP:%s" % dP[:1] sidx = [] didx = [] for i in range(len(sP)): # idx.append(bP[p[i]]) sidx.append(uP[sP[i]]) for i in range(len(dP)): # idx.append(bP[p[i]]) didx.append(bP[dP[i]]) print "sidx:%s"%sidx[:1] print "didx:%s"%didx[:1] # plt.plot(closep) # plt.plot(idx,d,'ko') lr = LinearRegression() X = np.atleast_2d(np.array(sidx)).T Y = np.array(sV) lr.fit(X, Y) estV = lr.predict(xt) fig = plt.figure(figsize=(16, 10), dpi=72) # plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9) plt.subplots_adjust(left=0.05, bottom=0.08, right=0.95, top=0.95, wspace=0.15, hspace=0.25) # set (gca,'Position',[0,0,512,512]) # fig.set_size_inches(18.5, 10.5) # fig=plt.fig(figsize=(14,8)) ax = fig.add_subplot(111) plt.grid(True) # print h.index[:5], h['close'] ax = h['close'].plot() # ax.plot(pd.datetime(h.index),h['close'], linewidth=1) # ax.plot(uP, uV, linewidth=1) # ax.plot(uP, uV, 'ko') # ax.plot(bP, bV, linewidth=1) # ax.plot(bP, bV, 'bo') # # ax.plot(sP, sV, linewidth=1) # # ax.plot(sP, sV, 'yo') # ax.plot(sidx, sV, linewidth=1) # ax.plot(sidx, sV, 'ro') # ax.plot(didx, dV, linewidth=1) # ax.plot(didx, dV, 'co') df['mean']=map(lambda h,l:(h+l)/2,df.high.values,df.low.values) print df['mean'][:1] # d=df.mean dw=dw.set_index('date') # print dw[:2] # ax.plot(df.index,df['mean'],'g',linewidth=1) ax.plot(df.index,pd.rolling_mean(df['mean'], 60), 'g',linewidth=1) ax.plot(dw.index,pd.rolling_mean(dw.close, 5), 'r',linewidth=1) ax.plot(dw.index,pd.rolling_min(dw.close, 5), 'bo') ax.plot(dw.index,pd.rolling_max(dw.close, 5), 'yo') ax.plot(dw.index,pd.expanding_max(dw.close, 5), 'ro') ax.plot(dw.index,pd.expanding_min(dw.close, 5), 'go') # print pd.rolling_min(df.close,20)[:1],pd.rolling_min(df.close,20)[-1:] # print pd.rolling_min(df.close,20) # print pd.rolling_max(df.close,20)[:1],pd.rolling_max(df.close,20)[-1:] # print pd.rolling_max(df.close,20) # ax.plot(idx, d, 'ko') # ax.plot(xt, estV, '-r', linewidth=5) # ax.plot(xt, yt, '-g', linewidth=5) # ax2 = fig.add_subplot(122) # print len(closep),len(idx),len(d),len(xt),len(estV),len(yt) # f=lambda x:x[-int(len(x)/10):] # ax2.plot(f(closep)) # ax2.plot(f(idx),f(d),'ko') # ax2.plot(f(xt),f(estV),'-r',linewidth=5) # ax2.plot(f(xt),f(yt),'-g',linewidth=5) # # plt.show() scale = 1.1 zp = zoompan.ZoomPan() figZoom = zp.zoom_factory(ax, base_scale=scale) figPan = zp.pan_factory(ax) show()
def comput_idicators(df, trading_days, required, save_file, save_address, whole=1): # TODO:net_value has some problem. # columns needed col = ['index_price', 'Interest_rate', 'nav', 'rebalancing', 'stoploss'] df_valid = df.ix[:, col] start_balance = df.index[df['rebalancing'] == 1][0] df_valid = df_valid[df_valid.index >= start_balance] # daily return df_valid['return'] = np.log(df['nav']) - np.log(df['nav'].shift(1)) # benchmark_net_value df_valid[ 'benchmark'] = df_valid['index_price'] / df_valid['index_price'].ix[0] # benchmark_return df_valid['benchmark_return'] = (df_valid['benchmark']- df_valid['benchmark'].shift(1))/\ df_valid['benchmark'].shift(1) # Annualized return df_valid['Annu_return'] = pd.expanding_mean( df_valid['return']) * trading_days # Volatility df_valid.loc[:, 'algo_volatility'] = pd.expanding_std( df_valid['return']) * np.sqrt(trading_days) df_valid.loc[:, 'xret'] = df_valid[ 'return'] - df_valid['Interest_rate'] / trading_days / 100 df_valid.loc[:, 'ex_return'] = df_valid['return'] - df_valid[ 'benchmark_return'] def ratio(x): return np.nanmean(x) / np.nanstd(x) # sharpe ratio df_valid.loc[:, 'sharpe'] = pd.expanding_apply(df_valid['xret'], ratio)\ * np.sqrt(trading_days) # information ratio df_valid.loc[:, 'IR'] = pd.expanding_apply(df_valid['ex_return'], ratio)\ * np.sqrt(trading_days) # Sortino ratio def modify_ratio(x, re): re /= trading_days ret = np.nanmean(x) - re st_d = np.nansum(np.square(x[x < re] - re)) / x[x < re].size return ret / np.sqrt(st_d) df_valid.loc[:, 'sortino'] = pd.expanding_apply( df_valid['return'], modify_ratio, args=(required, )) * np.sqrt(trading_days) # Transfer infs to NA df_valid.loc[np.isinf(df_valid.loc[:, 'sharpe']), 'sharpe'] = np.nan df_valid.loc[np.isinf(df_valid.loc[:, 'IR']), 'IR'] = np.nan # hit_rate wins = np.where(df_valid['return'] >= df_valid['benchmark_return'], 1.0, 0.0) df_valid.loc[:, 'hit_rate'] = wins.cumsum() / pd.expanding_apply(wins, len) # 95% VaR df_valid['VaR'] = -pd.expanding_quantile(df_valid['return'], 0.05)*\ np.sqrt(trading_days) # 95% CVaR df_valid['CVaR'] = -pd.expanding_apply(df_valid['return'], lambda x: x[x < np.nanpercentile(x, 5)].mean())\ * np.sqrt(trading_days) if whole == 1: # max_drawdown def exp_diff(x, type): if type == 'dollar': xret = pd.expanding_apply(x, lambda xx: (xx[-1] - xx.max())) else: xret = pd.expanding_apply( x, lambda xx: (xx[-1] - xx.max()) / xx.max()) return xret # dollar # xret = exp_diff(df_valid['cum_profit'],'dollar') # df_valid['max_drawdown_profit'] = abs(pd.expanding_min(xret)) # percentage xret = exp_diff(df_valid['nav'], 'percentage') df_valid['max_drawdown_ret'] = abs(pd.expanding_min(xret)) # max_drawdown_duration: # drawdown_enddate is the first time for restoring the max def drawdown_end(x, type): xret = exp_diff(x, type) minloc = xret[xret == xret.min()].index[0] x_sub = xret[xret.index > minloc] # if never recovering,then return nan try: return x_sub[x_sub == 0].index[0] except: return np.nan def drawdown_start(x, type): xret = exp_diff(x, type) minloc = xret[xret == xret.min()].index[0] x_sub = xret[xret.index < minloc] try: return x_sub[x_sub == 0].index[-1] except: return np.nan df_valid['max_drawdown_start'] = pd.Series() df_valid['max_drawdown_end'] = pd.Series() df_valid['max_drawdown_start'].ix[-1] = drawdown_start( df_valid['nav'], 'percentage') df_valid['max_drawdown_end'].ix[-1] = drawdown_end( df_valid['nav'], 'percentage') df_valid.to_csv(save_address) # =====result visualization===== plt.figure(1) if whole == 1: plt.subplot(224) plt.plot(df_valid['nav'], label='strategy') plt.plot(df_valid['benchmark'], label='S&P500') plt.xlabel('Date') plt.legend(loc=0, shadow=True) plt.ylabel('Nav') plt.title('Nav of ' + save_file + ' & SP500') # plt.subplot(223) # plt.plot(df_valid['cum_profit'],label = 'strategy') # plt.xlabel('Date') # plt.ylabel('Cum_profit') # plt.title('Cum_profit of ' + save_file) plt.subplot(221) plt.plot(df_valid['return'], label='strategy') plt.xlabel('Date') plt.ylabel('Daily_return') plt.title('Daily Return of ' + save_file) plt.subplot(222) x_return = df_valid[df_valid['return'].notna()].loc[:, 'return'] y_return = df_valid[ df_valid['benchmark_return'].notna()].loc[:, 'benchmark_return'] mu = x_return.mean() sigma = x_return.std() mybins = np.linspace(mu - 3 * sigma, mu + 3 * sigma, 100) count_x, _, _ = plt.hist(x_return, mybins, normed=1, alpha=0.5, label='strategy') count_y, _, _ = plt.hist(y_return, mybins, normed=1, alpha=0.5, label='S&P500') plt.ylabel('density') plt.xlabel('daily_return') plt.title('Histogram of Daily Return for ' + save_file + ' & SP500') plt.grid(True) # add normal distribution line y = mlab.normpdf(mybins, mu, sigma) plt.plot(mybins, y, 'r--', linewidth=1, label='Normal of strategy') plt.legend(loc=0, shadow=True) # plt.tight_layout() plt.show() return df_valid
df['n1_high'] = df['high'].rolling(window=N1,center=False).max() # print(df.head(43)) # 用pd.expanding_max()从第一个开始依次寻找目前出现过的最大值 # 实例如下 # demo_list = np.array([1,2,1,1,500,100]) # pd.expanding_max(demo_list) # array([ 1., 2., 2., 2., 500., 500.]) expan_max = pd.expanding_max(df['close']) #fillna() 将NaN替换为当前的序列 df['n1_high'].fillna(value=expan_max, inplace=True) # print(df.head(43)) df['n2_low'] = df['low'].rolling(window=N2,center=False).min() expan_min = pd.expanding_min(df['close']) df['n2_low'].fillna(value=expan_min, inplace=True) # print(df.head(22)) ##做空的序列 df['n1_low'] = df['low'].rolling(window=N1,center=False).min() df['n1_low'].fillna(value=expan_min, inplace=True) df['n2_high'] = df['high'].rolling(window=N2,center=False).max() df['n2_high'].fillna(value=expan_max, inplace=True) #接下来根据突破的定义来构建signal列 #当天收盘价格超过N天最高或最低价,超过最高价是作为买入信号 #buy_index=行的索引,本例中就是日期,而且是满足close大于n1_high的情况下的索引 buy_index_in = df[df['close'] > df['n1_high'].shift(1)].index #shift 是移动序列,即将整个n1_high列都下移一格
# 2014-05-28 210.24 212.77 205.26 -0.624 210.02 211.56 5496278 # 2014-05-29 210.24 212.49 207.72 0.000 210.57 210.24 3694596 # 2014-05-30 207.77 214.80 207.02 -1.175 210.30 210.24 5586068 # 2014-06-02 204.70 209.35 201.67 -1.478 207.33 207.77 4668115 # 2014-06-03 204.94 208.00 202.59 0.117 203.49 204.70 3866182 # # date date_week atr21 atr14 key n1_high # 2014-05-28 20140528 2 7.5100 7.5100 0 210.24 # 2014-05-29 20140529 3 6.0748 6.0421 1 210.24 # 2014-05-30 20140530 4 6.6981 6.7060 2 210.24 # 2014-06-02 20140602 0 7.2350 7.2763 3 210.24 # 2014-06-03 20140603 1 6.7973 6.7894 4 210.24 #下面使用类似的方式构建N2天内最低价格卖出信号n2_low: #rolling_min()函数和rolling_max()函数类似 tsla_df['n2_low'] = pd.rolling_min(tsla_df['low'], window=N2) expan_min = pd.expanding_min(tsla_df['close']) tsla_df['n2_low'].fillna(value=expan_min, inplace=True) #下面根据突破的定义来构建signal列: #当天的收盘价格超过N天内的最高价或最低价,超过最高价格作为买入信号买入股票持有 buy_index = tsla_df[tsla_df['close'] > tsla_df['n1_high'].shift(1)].index tsla_df.loc[buy_index, 'signal'] = 1 #当天收盘价格超过N天内的最高价格或最低价格,超过最低价格作为卖出信号 sell_index = tsla_df[tsla_df['close'] < tsla_df['n2_low'].shift(1)].index tsla_df.loc[sell_index, 'signal'] = 0 #筛选条件 今天的收盘价格>截止到昨天的最高价格 和 今天的收盘价格 < 截止到昨天的最低价格 #下面使用饼图显示在整个交易中信号的产生情况,可以发现买入信号比卖出信号多 #如下图所示 tsla_df.signal.value_counts().plot(kind='pie', figsize=(5, 5)) # 1.0 54 # 0.0 53 # Name: signal, dtype: int64
def analyse(self): # Logger.log(logging.INFO, "Analyse Strategy", {"scope":__name__, "Rule 1":self._rule1, "Rule 2":self._rule2, "Rule 3":self._rule3, "Type":self._type}) connection = sqlite3.connect(pyswing.database.pySwingDatabase) query = self.analyseStrategySql % (self._rule1, self._rule2, self._rule3, self._exit, self._type) self._strategyData = read_sql_query(query, connection, 'Date') self._strategyData[ 'ExitValueAfterCosts'] = self._strategyData['ExitValue'] - 0.2 connection.close() exitValueDataFrame = self._strategyData.ix[:, 'ExitValueAfterCosts'] mean = exitValueDataFrame.mean() median = exitValueDataFrame.median() sum = exitValueDataFrame.sum() count = exitValueDataFrame.count() tradesPerYear = count / 10 sharpeRatio = sqrt(tradesPerYear) * exitValueDataFrame.mean( ) / exitValueDataFrame.std() self._strategyData["Sum"] = expanding_sum(exitValueDataFrame) self._strategyData["Max"] = expanding_max(self._strategyData["Sum"]) self._strategyData["Min"] = expanding_min(self._strategyData["Sum"]) self._strategyData[ "DD"] = self._strategyData["Max"] - self._strategyData["Min"] runningSum = expanding_sum(exitValueDataFrame) max2here = expanding_max(runningSum) dd2here = runningSum - max2here drawDown = dd2here.min() Logger.log( logging.INFO, "Analysing Strategy", { "scope": __name__, "Rule 1": self._rule1, "Rule 2": self._rule2, "Rule 3": self._rule3, "Exit": self._exit, "Type": self._type, "Mean": str(mean), "Median": str(median), "Sum": str(sum), "Count": str(count), "SharpeRatio": str(sharpeRatio), "DrawDown": str(drawDown) }) connection = sqlite3.connect(pyswing.database.pySwingDatabase) c = connection.cursor() deleteSql = self.deleteStrategySql % ( pyswing.globals.pySwingStrategy, self._rule1, self._rule2, self._rule3, self._exit, self._type) c.executescript(deleteSql) connection.commit() insertSql = self.insertStrategySql % ( pyswing.globals.pySwingStrategy, self._rule1, self._rule2, self._rule3, self._exit, self._type, str(mean), str(median), str(sum), str(count), str(sharpeRatio), str(drawDown)) c.executescript(insertSql) connection.commit() c.close() connection.close()
if __name__ == '__main__': kl_pd = ABuSymbolPd.make_kl_df('TSLA', n_folds=2) # 1、这里采用N日趋势突破,即超过N1天内的最高价,就买入,低于N2天内的最低价,就卖出 N1 = 42 N2 = 21 # 2.1 采用pd.rolling_max可以寻找一个窗口长度内最大值 kl_pd['n1_high'] = pd.rolling_max(kl_pd['high'], window=N1) # 2.2 但这样会导致前N1-1个元素为NAN, # 我们使用pd.expanding_max来填充NAN,expanding_max会逐个遍历数组元素,并把返回直到当前位置看到过的最大元素 # 用前k天的收盘价来代替,k∈[0,N1] expan_max = pd.expanding_max(kl_pd['close']) kl_pd['n1_high'].fillna(expan_max, inplace=True) # 2.3 最小值同理 kl_pd['n2_low'] = pd.rolling_min(kl_pd['low'], window=N2) expan_min = pd.expanding_min(kl_pd['close']) kl_pd['n2_low'].fillna(expan_min, inplace=True) print(kl_pd.head()) # 3.1 根据n1_high和n2_low来定义买入卖出的信号序列 # 注意,是当天的收盘价,高于昨天以前的n1值,就买入,不能包括今天的,因为今天的收盘价,怎么也不会高于今天的最高值 buy_signal = kl_pd[kl_pd.close > kl_pd.n1_high.shift(1)].index kl_pd.loc[buy_signal, 'signal'] = 1 # 3.2 n2_low的卖出信号同理 sell_signal = kl_pd[kl_pd.close < kl_pd.n2_low.shift(1)].index kl_pd.loc[sell_signal, 'signal'] = 0 # 3.3 这里可以不用考虑Nan的情况 kl_pd.signal.value_counts().plot(kind='pie') plt.show() # 4.1 将买入卖出的信号转化为持股的状态
def longsklearn(code='999999', ptype='f', dtype='d', start=None, end=None): # code='999999' # dtype = 'w' # start = '2014-09-01' # start = None # end='2015-12-23' # end = None df = tdd.get_tdx_append_now_df(code, ptype, start, end).sort_index(ascending=True) # if not dtype == 'd': # df = tdd.get_tdx_stock_period_to_type(df, dtype).sort_index(ascending=True) dw = tdd.get_tdx_stock_period_to_type(df, dtype).sort_index(ascending=True) # print df[:1] h = df.loc[:, ['open', 'close', 'high', 'low']] highp = h['high'].values lowp = h['low'].values openp = h['open'].values closep = h['close'].values lr = LinearRegression() x = np.atleast_2d(np.linspace(0, len(closep), len(closep))).T lr.fit(x, closep) LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False) xt = np.atleast_2d(np.linspace(0, len(closep) + 200, len(closep) + 200)).T yt = lr.predict(xt) # plt.plot(xt,yt,'-g',linewidth=5) # plt.plot(closep) bV = [] bP = [] uV = [] uP = [] for i in range(1, len(highp) - 1): # if highp[i] <= highp[i - 1] and highp[i] < highp[i + 1] and lowp[i] <= lowp[i - 1] and lowp[i] < lowp[i + 1]: if lowp[i] <= lowp[i - 1] and lowp[i] < lowp[i + 1]: bV.append(lowp[i]) bP.append(i) for i in range(1, len(highp) - 1): # if highp[i] >= highp[i - 1] and highp[i] > highp[i + 1] and lowp[i] >= lowp[i - 1] and lowp[i] > lowp[i + 1]: if highp[i] >= highp[i - 1] and highp[i] > highp[i + 1]: uV.append(highp[i]) uP.append(i) print(highp) print("uV:%s" % uV[:1]) print("uP:%s" % uP[:1]) print("bV:%s" % bV[:1]) print("bP:%s" % bP[:1]) sV, sP = LIS(uV) dV, dP = LIS(bV) print("sV:%s" % sV[:1]) print("sP:%s" % sP[:1]) print("dV:%s" % dV[:1]) print("dP:%s" % dP[:1]) sidx = [] didx = [] for i in range(len(sP)): # idx.append(bP[p[i]]) sidx.append(uP[sP[i]]) for i in range(len(dP)): # idx.append(bP[p[i]]) didx.append(bP[dP[i]]) print("sidx:%s" % sidx[:1]) print("didx:%s" % didx[:1]) # plt.plot(closep) # plt.plot(idx,d,'ko') lr = LinearRegression() X = np.atleast_2d(np.array(sidx)).T Y = np.array(sV) lr.fit(X, Y) estV = lr.predict(xt) fig = plt.figure(figsize=(16, 10), dpi=72) # plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9) plt.subplots_adjust(left=0.05, bottom=0.08, right=0.95, top=0.95, wspace=0.15, hspace=0.25) # set (gca,'Position',[0,0,512,512]) # fig.set_size_inches(18.5, 10.5) # fig=plt.fig(figsize=(14,8)) ax = fig.add_subplot(111) plt.grid(True) # print h.index[:5], h['close'] ax = h['close'].plot() # ax.plot(pd.datetime(h.index),h['close'], linewidth=1) # ax.plot(uP, uV, linewidth=1) # ax.plot(uP, uV, 'ko') # ax.plot(bP, bV, linewidth=1) # ax.plot(bP, bV, 'bo') # # ax.plot(sP, sV, linewidth=1) # # ax.plot(sP, sV, 'yo') # ax.plot(sidx, sV, linewidth=1) # ax.plot(sidx, sV, 'ro') # ax.plot(didx, dV, linewidth=1) # ax.plot(didx, dV, 'co') df['mean'] = list( map(lambda h, l: (h + l) / 2, df.high.values, df.low.values)) print(df['mean'][:1]) # d=df.mean dw = dw.set_index('date') # print dw[:2] # ax.plot(df.index,df['mean'],'g',linewidth=1) ax.plot(df.index, pd.rolling_mean(df['mean'], 60), 'g', linewidth=1) ax.plot(dw.index, pd.rolling_mean(dw.close, 5), 'r', linewidth=1) ax.plot(dw.index, pd.rolling_min(dw.close, 5), 'bo') ax.plot(dw.index, pd.rolling_max(dw.close, 5), 'yo') ax.plot(dw.index, pd.expanding_max(dw.close, 5), 'ro') ax.plot(dw.index, pd.expanding_min(dw.close, 5), 'go') # print pd.rolling_min(df.close,20)[:1],pd.rolling_min(df.close,20)[-1:] # print pd.rolling_min(df.close,20) # print pd.rolling_max(df.close,20)[:1],pd.rolling_max(df.close,20)[-1:] # print pd.rolling_max(df.close,20) # ax.plot(idx, d, 'ko') # ax.plot(xt, estV, '-r', linewidth=5) # ax.plot(xt, yt, '-g', linewidth=5) # ax2 = fig.add_subplot(122) # print len(closep),len(idx),len(d),len(xt),len(estV),len(yt) # f=lambda x:x[-int(len(x)/10):] # ax2.plot(f(closep)) # ax2.plot(f(idx),f(d),'ko') # ax2.plot(f(xt),f(estV),'-r',linewidth=5) # ax2.plot(f(xt),f(yt),'-g',linewidth=5) # # plt.show() scale = 1.1 zp = zoompan.ZoomPan() figZoom = zp.zoom_factory(ax, base_scale=scale) figPan = zp.pan_factory(ax) show()
# ========== 根据上一步得到的代码列表,遍历所有股票,将这些股票合并到一张表格all_stock中 all_stock = pd.DataFrame() # 遍历每个创业板的股票 for code in stock_code_list: print code # 从csv文件中读取该股票数据 stock_data = pd.read_csv('trading-data@full/stock data/' + code + '.csv', parse_dates=[1])# 注意:这里请填写数据文件在您电脑中的路径 stock_data.sort('date', inplace=True)# 对数据按照【date】交易日期从小到大排序 # 计算KDJ指标 low_list = pd.rolling_min(stock_data['low'], 9) low_list.fillna(value=pd.expanding_min(stock_data['low']), inplace=True) high_list = pd.rolling_max(stock_data['high'], 9) high_list.fillna(value=pd.expanding_max(stock_data['high']), inplace=True) rsv = (stock_data['close'] - low_list) / (high_list - low_list) * 100 stock_data['KDJ_K'] = pd.ewma(rsv, com=2) stock_data['KDJ_D'] = pd.ewma(stock_data['KDJ_K'], com=2) stock_data['KDJ_J'] = 3 * stock_data['KDJ_K'] - 2 * stock_data['KDJ_D'] # 计算KDJ指标金叉、死叉情况 stock_data['KDJ_金叉死叉'] = '' kdj_position = stock_data['KDJ_K'] > stock_data['KDJ_D'] stock_data.loc[kdj_position[(kdj_position == True) & (kdj_position.shift() == False)].index, 'KDJ_金叉死叉'] = '金叉' stock_data.loc[kdj_position[(kdj_position == False) & (kdj_position.shift() == True)].index, 'KDJ_金叉死叉'] = '死叉' # 通过复权价格计算接下来几个交易日的收益率 for n in [1, 2, 3, 5, 10, 20]: stock_data['接下来'+str(n)+'个交易日涨跌幅'] = stock_data['adjust_price'].shift(-1*n) / stock_data['adjust_price'] - 1.0