Пример #1
0
def computeRunningMinMaxSignals(df, ema_feats_df, spike_5s, norm):
    max_to_here = pd.expanding_max(df['microprice_ema_200ms_ticks'])
    min_to_here = pd.expanding_min(df['microprice_ema_200ms_ticks'])

    if spike_5s >= 0:
        max_disl = max_to_here
    else:
        max_disl = min_to_here

    ema_feats_df['from_max_disl_ema_200ms_ticks'] = computeRelativeDislocation(df['microprice_ema_200ms_ticks'], max_disl, max_disl)
    ema_feats_df['from_max_disl_ema_2000ms_ticks'] = computeRelativeDislocation(df['microprice_ema_2000ms_ticks'], max_disl, max_disl)
    ema_feats_df['from_max_disl_ema_10000ms_ticks'] = computeRelativeDislocation(df['microprice_ema_10000ms_ticks'], max_disl, max_disl)

    rev_from_max_disl_to_here = df['microprice_ema_200ms_ticks'] - max_disl
    max_rev_to_here = pd.expanding_max(rev_from_max_disl_to_here)
    min_rev_to_here = pd.expanding_min(rev_from_max_disl_to_here)

    if spike_5s >= 0:
        max_rev = max_disl + min_rev_to_here
    else:
        max_rev = max_disl + max_rev_to_here

    ema_feats_df['from_max_rev_ema_200ms_ticks'] = computeRelativeDislocation(df['microprice_ema_200ms_ticks'], max_rev, max_rev)
    ema_feats_df['from_max_rev_ema_2000ms_ticks'] = computeRelativeDislocation(df['microprice_ema_2000ms_ticks'], max_rev, max_rev)
    ema_feats_df['from_max_rev_ema_10000ms_ticks'] = computeRelativeDislocation(df['microprice_ema_10000ms_ticks'], max_rev, max_rev)
        

    return (ema_feats_df, df)
Пример #2
0
    def update_price(self, date, sid):
        try:
            p = pd.Series(self.collection.find_one({'sid': sid, 'date': date, 'dname': 'price'})['dvalue'])
        except:
            self.logger.warning('No price found for {} on {}', sid, date)
            return
        p.index = [datetime.strptime(date, '%Y%m%d')+timedelta(milliseconds=int(s)) for s in p.index]

        df5 = p.resample('5min', 'ohlc', label='right', closed='right')
        df5.columns = ['into', 'inth', 'intl', 'intc']
        df5['lstl'] = pd.expanding_min(p).resample('5min', 'last', label='right', closed='right')
        df5['lsth'] = pd.expanding_max(p).resample('5min', 'last', label='right', closed='right')
        df5['vlty'] = p.resample('5min', lambda x: x.std(), label='right', closed='right')
        df5['lstp'] = p.resample('5min', 'last', label='right', closed='right')
        df5['tvwp'] = p.resample('5min', 'mean', label='right', closed='right')
        df5.index = [dt.strftime('%H%M%S') for dt in df5.index]
        df5 = df5.ix[times_5min]
        for dname, ser in df5.iteritems():
            self.db.IF_5min.update({'sid': sid, 'date': date, 'dname': dname}, {'$set': {'dvalue': ser.to_dict()}}, upsert=True)

        df1 = p.resample('1min', 'ohlc', label='right', closed='right')
        df1.columns = ['into', 'inth', 'intl', 'intc']
        df1['lstl'] = pd.expanding_min(p).resample('1min', 'last', label='right', closed='right')
        df1['lstl'] = pd.expanding_max(p).resample('1min', 'last', label='right', closed='right')
        df1['vlty'] = p.resample('1min', lambda x: x.std(), label='right', closed='right')
        df1['lstp'] = p.resample('1min', 'last', label='right', closed='right')
        df1.index = [dt.strftime('%H%M%S') for dt in df1.index]
        df1 = df1.ix[times_1min]
        for dname, ser in df1.iteritems():
            self.db.IF_1min.update({'sid': sid, 'date': date, 'dname': dname}, {'$set': {'dvalue': ser.to_dict()}}, upsert=True)
Пример #3
0
def indicator_KDJ(stock_data):  #KDJ指标计算函数
    # 计算KDJ指标
    low_list = pd.rolling_min(stock_data['low'], 9)  #9天为一个周期,但前8个值为NaN
    low_list.fillna(value=pd.expanding_min(stock_data['low']),
                    inplace=True)  #将NaN用累积窗口计算的最小值代替
    high_list = pd.rolling_max(stock_data['high'], 9)
    high_list.fillna(value=pd.expanding_max(stock_data['high']), inplace=True)
    rsv = (stock_data['close'] - low_list) / (high_list - low_list) * 100
    stock_data['KDJ_K'] = pd.ewma(rsv, com=2, adjust=False)
    stock_data['KDJ_D'] = pd.ewma(stock_data['KDJ_K'], com=2, adjust=False)
    stock_data['KDJ_J'] = 3 * stock_data['KDJ_K'] - 2 * stock_data['KDJ_D']
    # 计算KDJ指标金叉、死叉情况
    stock_data['KDJ_金叉死叉'] = ''
    kdj_position = stock_data['KDJ_K'] > stock_data['KDJ_D']
    stock_data.loc[kdj_position[(kdj_position == True)
                                & (kdj_position.shift() == False)].index,
                   'KDJ_金叉死叉'] = '金叉'  #前一天K<D,当天K>D
    stock_data.loc[kdj_position[(kdj_position == False)
                                & (kdj_position.shift() == True)].index,
                   'KDJ_金叉死叉'] = '死叉'
    # 通过复权价格计算接下来几个交易日的收益率
    for n in [1, 2, 3, 5, 10, 20]:
        stock_data['接下来' + str(n) + '个交易日涨跌幅'] = stock_data['close'].shift(
            -1 * n) / stock_data['close'] - 1.0
    stock_data.dropna(how='any', inplace=True)  # 删除所有有空值的数据行
    # 筛选出KDJ金叉的数据,并将这些数据合并到all_stock中
    stock_data = stock_data[(stock_data['KDJ_金叉死叉'] == '金叉')]
    if not stock_data.empty:
        return stock_data
Пример #4
0
def get_kdj(code):
    stock_data = ts.get_k_data(code)
    # kdj
    low_list = pd.rolling_min(stock_data['low'], 9)
    low_list.fillna(value=pd.expanding_min(stock_data['low']), inplace=True)
    high_list = pd.rolling_max(stock_data['high'], 9)
    high_list.fillna(value=pd.expanding_max(stock_data['high']), inplace=True)
    rsv = (stock_data['close'] - low_list) / (high_list - low_list) * 100
    stock_data['kdj_k'] = pd.ewma(rsv, com=2)
    stock_data['kdj_d'] = pd.ewma(stock_data['kdj_k'], com=2)
    stock_data['kdj_j'] = 3 * stock_data['kdj_k'] - 2 * stock_data['kdj_d']
    # 用今天的j值和昨天比较
    kdj_j = stock_data['kdj_j']
    yesterdayJ = kdj_j[kdj_j.size - 2]
    todayJ = kdj_j[kdj_j.size - 1]
    kdj_k = stock_data['kdj_k']
    todayK = kdj_k[kdj_k.size - 1]
    # 如果今天的j值大于昨天的j值才继续后面的逻辑
    if (todayJ > yesterdayJ and todayK < float(20)):
        # 计算价格5日百分比
        stock_data = stock_data[stock_data.date > str(dc.get_the_day_before_today(1))]
        stock_data['kdj_ok'] = 1
    else:
        stock_data = stock_data[stock_data.date > str(dc.get_the_day_before_today(1))]
        stock_data['kdj_ok'] = 0
    return stock_data
Пример #5
0
def KDJ(date, N=9, M1=3, M2=3):
    low_list = pd.rolling_min(date[low], N)
    low_list.fillna(value=pd.expanding_min(date[low]), inplace=True)
    high_list = pd.rolling_max(date[high], N)
    high_list.fillna(value=pd.expanding_max(date[high]), inplace=True)
    rsv = (date['close'] - low_list) / (high_list - low_list) * 100
    date['KDJ_K'] = pd.ewma(rsv, com=2)
    date['KDJ_D'] = pd.ewma(date['KDJ_K'], com=2)
    date['KDJ_J'] = 3 * date['KDJ_K'] - 2 * date['KDJ_D']
Пример #6
0
def kdj(data,date,m1,m2):
	data_use=data[['high','low','open','close']]
	data['lown'] = pd.rolling_min(data_use['low'], date)
	data.lown.fillna(value=pd.expanding_min(data_use['low']), inplace=True)
	data['highn'] = pd.rolling_max(data_use['high'], date)
	data.highn.fillna(value=pd.expanding_max(data_use['high']), inplace=True)
	data['rsv']=(data['close'] - data['lown']) / (data['highn'] - data['lown']) * 100
	data['kdj_k'] = pd.ewma(data['rsv'], m1)
	data['kdj_d'] = pd.ewma(data['kdj_k'], m2)
	data['kdj_j'] = 3 * data['kdj_k'] - 2 * data['kdj_d']
Пример #7
0
def kdj(data,date,m1,m2):
	data_use=data[['high','low','open','close']]
	data['lown'] = pd.rolling_min(data_use['low'], date)
	data.lown.fillna(value=pd.expanding_min(data_use['low']), inplace=True)
	data['highn'] = pd.rolling_max(data_use['high'], date)
	data.highn.fillna(value=pd.expanding_max(data_use['high']), inplace=True)
	data['rsv']=(data['close'] - data['lown']) / (data['highn'] - data['lown']) * 100
	data['kdj_k'] = pd.ewma(data['rsv'], m1)
	data['kdj_d'] = pd.ewma(data['kdj_k'], m2)
	data['kdj_j'] = 3 * data['kdj_k'] - 2 * data['kdj_d']
Пример #8
0
def analysis_kdjv2(code):
    filename='./stockdata/data/last3year/'+code+'.csv'
    stock_dataT = pd.read_csv(filename, parse_dates=['date'])
    #stock_dataT.sort('date', inplace=True)
    stock_data=stock_dataT.loc[:,('date', 'high', 'low', 'close', 'p_change')]
    # 计算KDJ指标
    stock_data['low_list'] = pd.rolling_min(stock_data['low'], 9)
    stock_data['low_list'].fillna(value=pd.expanding_min(stock_data['low']), inplace=True)
    stock_data['high_list'] = pd.rolling_max(stock_data['high'], 9)
    stock_data['high_list'].fillna(value=pd.expanding_max(stock_data['high']), inplace=True)
    stock_data['rsv'] = (stock_data['close'] - stock_data['low_list']) / (stock_data['high_list'] - stock_data['low_list']) * 100
    #stock_data['rsv']=(stock_data['close'] - low_list) / (high_list - low_list) * 100
    stock_data['KDJ_K'] = pd.ewma(stock_data['rsv'], com=3)
    stock_data['KDJ_D'] = pd.ewma(stock_data['KDJ_K'], com=3)
    stock_data['KDJ_J'] = 3 * stock_data['KDJ_K'] - 2 * stock_data['KDJ_D']
    # 计算KDJ指标金叉、死叉情况
    ###通常就敏感性而言,J值最强,K值次之,D值最慢,而就安全性而言,J值最差,K值次之,D值最稳
    ##金叉用1表示,死叉用0表示
    buyi=stock_data[(stock_data['KDJ_K'] > stock_data['KDJ_D'])&(stock_data['KDJ_K'].shift(1) < stock_data['KDJ_D'].shift(1))].index
    stock_data.loc[buyi,'Signal'] = 1
    selli=stock_data[(stock_data['KDJ_K'] < stock_data['KDJ_D'])&(stock_data['KDJ_K'].shift(1) > stock_data['KDJ_D'].shift(1))].index
    stock_data.loc[selli,'Signal'] = 0
    #kdj_position = stock_data['KDJ_K'] > stock_data['KDJ_D']
    #stock_data.loc[kdj_position[(kdj_position == True) & (kdj_position.shift() == False)].index, 'KDJ_BS'] = 1
    #stock_data.loc[kdj_position[(kdj_position == False) & (kdj_position.shift() == True)].index, 'KDJ_BS'] = 0
    stock_data['position']=stock_data['Signal'].shift(1)
    stock_data['position'].fillna(method='ffill', inplace=True)
    #当仓位为1时,已当天的开盘价买入股票,当仓位为0时,以收盘价卖出该股份。计算从数据期内的收益
    stock_data['Cash_index'] = ((stock_data['p_change']/100) * stock_data['position'] + 1.0).cumprod()
    initial_idx = 1
    #initial_idx = stock_data.iloc[0]['close'] / (1 + (stock_data.iloc[0]['p_change']/100))
    stock_data['Cash_index'] *= initial_idx
    print 'The KDJ Backwards methon Signal:'
    Make_decision(stock_data)
    # 通过复权价格计算接下来几个交易日的收益率
    for n in [1, 2, 3, 5, 10, 20]:
        stock_data['CP_next_'+str(n)+'_days'] =(stock_data['close'].shift(-1*n) / stock_data['close'] - 1.0)*100
        #stock_data.dropna(how='any', inplace=True)# 删除所有有空值的数据行
        # ========== 将算好的数据输出到csv文件 - 注意:这里请填写输出文件在您电脑中的路径
        ##统计出现买点时点的数据
        dd=stock_data[stock_data['Signal']==1]
    print_return_next_n_day(dd)
    codedir='./output/A/'+code+os.sep
    if not os.path.exists(codedir):
        os.mkdir(codedir)
    # ==========计算每年指数的收益以及海龟交易法则的收益
    stock_data['p_change_KDJV2'] = (stock_data['p_change']) * stock_data['position']
    year_rtn = stock_data.set_index('date')[['p_change', 'p_change_KDJV2']].\
               resample('A', how=lambda x: (x/100+1.0).prod() - 1.0) * 100
    year_rtn.to_csv(codedir+'kdjv2_year.csv', encoding='gbk')
    stock_data.to_csv(codedir+'kdjv2.csv',encoding='gbk',index=False)
    stock_data.tail(20).to_csv(codedir+'kdjv2_Signal.csv',encoding='gbk',index=False)
    print 'the share %s trading sign for KDJV2:'%code
    print stock_data.tail(5)
    return
Пример #9
0
def kdj(stock):
    low_list = pd.rolling_min(stock.low, 9)
    low_list.fillna(value=pd.expanding_min(stock.low), inplace=True)
    high_list = pd.rolling_max(stock.high, 9)
    high_list.fillna(value=pd.expanding_max(stock.high), inplace=True)
    rsv = (stock.close - low_list) / (high_list - low_list) * 100
    k = pd.ewma(rsv, com=2)
    d = pd.ewma(k, com=2)
    j = 3 * k[2:] - 2 * d

    return k, d, j
Пример #10
0
def kdj(stock):
    low_list = pd.rolling_min(stock.low, 9)
    low_list.fillna(value = pd.expanding_min(stock.low), inplace = True)
    high_list = pd.rolling_max(stock.high, 9)
    high_list.fillna(value = pd.expanding_max(stock.high), inplace = True)
    rsv = (stock.close - low_list) / (high_list - low_list) * 100
    k = pd.ewma(rsv, com =2)
    d = pd.ewma(k, com =2)
    j = 3 * k[2:] - 2 *d

    return k, d, j
Пример #11
0
def get_kdj(stock_data):
    # kdj计算
    low_list = pd.rolling_min(stock_data['low'], 9)
    low_list.fillna(value=pd.expanding_min(stock_data['low']), inplace=True)
    high_list = pd.rolling_max(stock_data['high'], 9)
    high_list.fillna(value=pd.expanding_max(stock_data['high']), inplace=True)
    rsv = (stock_data['close'] - low_list) / (high_list - low_list) * 100
    # 增加kdj数据到 stock_data中
    stock_data['kdj_k'] = round(pd.ewma(rsv, com=2), 2)
    stock_data['kdj_d'] = round(pd.ewma(stock_data['kdj_k'], com=2), 2)
    stock_data['kdj_j'] = round(
        3 * stock_data['kdj_k'] - 2 * stock_data['kdj_d'], 2)
    return stock_data
Пример #12
0
def calKDJ(data, N=0, M=0):
    if N == 0:
        N = 9
    if M == 0:
        M = 2
    low_list = pd.rolling_min(data['low'], N)
    low_list.fillna(value=pd.expanding_min(data['low']), inplace=True)
    high_list = pd.rolling_max(data['high'], N)
    high_list.fillna(value=pd.expanding_max(data['high']), inplace=True)
    rsv = (data['close'] - low_list) / (high_list - low_list) * 100
    KDJ_K = pd.ewma(rsv, com=M)
    KDJ_D = pd.ewma(KDJ_K, com=M)
    KDJ_J = 3 * KDJ_K - 2 * KDJ_D
    #kdjdata.fillna(0, inplace=True)
    return low_list, high_list, rsv, KDJ_K, KDJ_D, KDJ_J
Пример #13
0
    def analyse(self):

        # Logger.log(logging.INFO, "Analyse Strategy", {"scope":__name__, "Rule 1":self._rule1, "Rule 2":self._rule2, "Rule 3":self._rule3, "Type":self._type})

        connection = sqlite3.connect(pyswing.database.pySwingDatabase)
        query = self.analyseStrategySql % (self._rule1, self._rule2, self._rule3, self._exit, self._type)
        self._strategyData = read_sql_query(query, connection, 'Date')
        self._strategyData['ExitValueAfterCosts'] = self._strategyData['ExitValue'] - 0.2
        connection.close()

        exitValueDataFrame = self._strategyData.ix[:,'ExitValueAfterCosts']

        mean = exitValueDataFrame.mean()
        median = exitValueDataFrame.median()
        sum = exitValueDataFrame.sum()
        count = exitValueDataFrame.count()

        tradesPerYear = count / 10
        sharpeRatio = sqrt(tradesPerYear) * exitValueDataFrame.mean() / exitValueDataFrame.std()

        self._strategyData["Sum"] = expanding_sum(exitValueDataFrame)
        self._strategyData["Max"] = expanding_max(self._strategyData["Sum"])
        self._strategyData["Min"] = expanding_min(self._strategyData["Sum"])
        self._strategyData["DD"] = self._strategyData["Max"] - self._strategyData["Min"]

        runningSum = expanding_sum(exitValueDataFrame)
        max2here = expanding_max(runningSum)
        dd2here = runningSum - max2here
        drawDown = dd2here.min()

        Logger.log(logging.INFO, "Analysing Strategy", {"scope":__name__, "Rule 1":self._rule1, "Rule 2":self._rule2, "Rule 3":self._rule3, "Exit":self._exit, "Type":self._type, "Mean":str(mean), "Median":str(median), "Sum":str(sum), "Count":str(count), "SharpeRatio":str(sharpeRatio), "DrawDown":str(drawDown)})

        connection = sqlite3.connect(pyswing.database.pySwingDatabase)
        c = connection.cursor()

        deleteSql = self.deleteStrategySql % (pyswing.globals.pySwingStrategy, self._rule1, self._rule2, self._rule3, self._exit, self._type)
        c.executescript(deleteSql)
        connection.commit()

        insertSql = self.insertStrategySql % (pyswing.globals.pySwingStrategy, self._rule1, self._rule2, self._rule3, self._exit, self._type, str(mean), str(median), str(sum), str(count), str(sharpeRatio), str(drawDown))
        c.executescript(insertSql)
        connection.commit()

        c.close()
        connection.close()
Пример #14
0
def get_kdj_history(code):
    stock_data = ts.get_k_data(code)
    # kdj
    low_list = pd.rolling_min(stock_data['low'], 9)
    low_list.fillna(value=pd.expanding_min(stock_data['low']), inplace=True)
    high_list = pd.rolling_max(stock_data['high'], 9)
    high_list.fillna(value=pd.expanding_max(stock_data['high']), inplace=True)
    rsv = (stock_data['close'] - low_list) / (high_list - low_list) * 100
    stock_data['kdj_k'] = round(pd.ewma(rsv, com=2), 2)
    stock_data['kdj_d'] = round(pd.ewma(stock_data['kdj_k'], com=2), 2)
    stock_data['kdj_j'] = round(3 * stock_data['kdj_k'] - 2 * stock_data['kdj_d'], 2)
    # 用今天的j值和昨天比较
    kdj_j = stock_data['kdj_j']
    if (kdj_j.size < 6):
        stock_data = stock_data.tail(1)
        stock_data['kdj_k'] = 0
        stock_data['kdj_ok'] = 0
        return stock_data
    yesterdayJ = kdj_j[kdj_j.size - 6]
    todayJ = kdj_j[kdj_j.size - 5]
    kdj_k = stock_data['kdj_k']
    todayK = kdj_k[kdj_k.size - 5]
    # 如果今天的j值大于昨天的j值才继续后面的逻辑
    if (todayJ > yesterdayJ and todayK < float(20)):
        # 计算价格5日百分比
        stock_data_copy = stock_data[:]
        stock_data_copy = stock_data_copy.tail(5)
        stock_data_copy['indexNum'] = [1, 2, 3, 4, 5]
        stock_data_copy = stock_data_copy.sort(columns='high')
        stock_data_copy = stock_data_copy.tail(1)
        maxValue = stock_data_copy.high.values
        maxDate = stock_data_copy.date.values
        stock_data = stock_data.tail(5)
        stock_data = stock_data.head(1)
        stock_data['kdj_ok'] = 1
        highPercent = maxValue / stock_data.close.values[0]
        stock_data['highPercent'] = (round(highPercent, 3) * 100) - 100
        stock_data['highDate'] = maxDate
        stock_data['highDays'] = stock_data_copy.indexNum.values
    else:
        stock_data = stock_data.tail(1)
        stock_data['kdj_ok'] = 0
    return stock_data
Пример #15
0
 def expanding_smoother(self, data, stype='rolling_mean', min_periods=None, freq=None):
     """
     
     Perform a expanding smooting on the data for a complete help refer to http://pandas.pydata.org/pandas-docs/dev/computation.html
     
     :param data: pandas dataframe input data
     :param stype: soothing type
     :param min_periods: periods
     :param freq: frequence
     smoothing types:
     expanding_count	Number of non-null observations
     expanding_sum	Sum of values
     expanding_mean	Mean of values
     expanding_median	Arithmetic median of values
     expanding_min	Minimum
     expanding_max	Maximum
     expandingg_std	Unbiased standard deviation
     expanding_var	Unbiased variance
     expanding_skew	Unbiased skewness (3rd moment)
     expanding_kurt	Unbiased kurtosis (4th moment)
     
     """
     if stype == 'count':
         newy = pd.expanding_count(data, min_periods=min_periods, freq=freq)
     if stype == 'sum':
         newy = pd.expanding_sum(data, min_periods=min_periods, freq=freq)
     if stype == 'mean':
         newy = pd.expanding_mean(data, min_periods=min_periods, freq=freq)
     if stype == 'median':
         newy = pd.expanding_median(data, min_periods=min_periods, freq=freq)
     if stype == 'min':
         newy = pd.expanding_min(data, min_periods=min_periods, freq=freq)
     if stype == 'max':
         newy = pd.expanding_max(data, min_periods=min_periods, freq=freq)
     if stype == 'std':
         newy = pd.expanding_std(data, min_periods=min_periods, freq=freq)
     if stype == 'var':
         newy = pd.expanding_var(data, min_periods=min_periods, freq=freq)
     if stype == 'skew':
         newy = pd.expanding_skew(data, min_periods=min_periods, freq=freq)
     if stype == 'kurt':
         newy = pd.expanding_kurt(data, min_periods=min_periods, freq=freq)
     return newy
def lm_kdj(df, n,ksgn='close'):
    '''
    【输入】
        df, pd.dataframe格式数据源
        n,时间长度
        ksgn,列名,一般是:close收盘价
    【输出】
        df, pd.dataframe格式数据源,
        增加了一栏:_{n},输出数据
    '''
    lowList= pd.rolling_min(df['low'], n)
    lowList.fillna(value=pd.expanding_min(df['low']), inplace=True)
    highList = pd.rolling_max(df['high'], n)
    highList.fillna(value=pd.expanding_max(df['high']), inplace=True)
    rsv = (df[ksgn] - lowList) / (highList - lowList) * 100

    df['k'] = pd.ewma(rsv,com=2)
    df['d'] = pd.ewma(df['k'],com=2)
    df['j'] = 3.0 * df['k'] - 2.0 * df['d']
    #print('n df',len(df))
    return df
Пример #17
0
    def computeRunningMinMaxSignals(self, df, start_dt, spike_ticks, spike_5s_pred):
#	print start_dt
	pre_event_snapshot_dt = start_dt - timedelta(seconds=2.5)
        pre_event_snapshot_loc = df.index.get_loc(pre_event_snapshot_dt)

#	print df.ix[self.cur_loc, 'time'], len(df.ix[pre_event_snapshot_loc : self.cur_loc+1, 'microprice_ema_200ms'])

        max_to_here = pd.expanding_max(df.ix[pre_event_snapshot_loc : self.cur_loc+1, 'microprice_ema_200ms'])[-1]
        min_to_here = pd.expanding_min(df.ix[pre_event_snapshot_loc : self.cur_loc+1, 'microprice_ema_200ms'])[-1]

        max_to_here_ticks = self.priceTicks(max_to_here)
        min_to_here_ticks = self.priceTicks(min_to_here)

        if (spike_ticks + spike_5s_pred)/2.0 >= 0:
            max_disl = max_to_here_ticks
        else:
            max_disl = min_to_here_ticks

        #df.ix[self.cur_loc, 'from_max_disl_ema_200ms_ticks'] = computeRelativeDislocation(df.ix[self.cur_loc, 'microprice_ema_200ms_ticks'], max_disl)
	df.ix[self.cur_loc, 'max_disl_ema_200ms_ticks'] = max_disl
        #df[self.cur_loc]['max_disl_ema_200ms_ticks'] = max_disl

	return df
def indicator_KDJ(stock_data):#KDJ指标计算函数
    # 计算KDJ指标
    low_list = pd.rolling_min(stock_data['low'], 9) #9天为一个周期,但前8个值为NaN
    low_list.fillna(value=pd.expanding_min(stock_data['low']), inplace=True) #将NaN用累积窗口计算的最小值代替
    high_list = pd.rolling_max(stock_data['high'], 9)
    high_list.fillna(value=pd.expanding_max(stock_data['high']), inplace=True)
    rsv = (stock_data['close'] - low_list) / (high_list - low_list) * 100
    stock_data['KDJ_K'] = pd.ewma(rsv, com=2, adjust=False)
    stock_data['KDJ_D'] = pd.ewma(stock_data['KDJ_K'], com=2, adjust=False)
    stock_data['KDJ_J'] = 3 * stock_data['KDJ_K'] - 2 * stock_data['KDJ_D']
    # 计算KDJ指标金叉、死叉情况
    stock_data['KDJ_金叉死叉'] = ''
    kdj_position = stock_data['KDJ_K'] > stock_data['KDJ_D']
    stock_data.loc[kdj_position[(kdj_position == True) & (kdj_position.shift() == False)].index, 'KDJ_金叉死叉'] = '金叉' #前一天K<D,当天K>D
    stock_data.loc[kdj_position[(kdj_position == False) & (kdj_position.shift() == True)].index, 'KDJ_金叉死叉'] = '死叉'
    # 通过复权价格计算接下来几个交易日的收益率
    for n in [1, 2, 3, 5, 10, 20]:
        stock_data['接下来'+str(n)+'个交易日涨跌幅'] = stock_data['close'].shift(-1*n) / stock_data['close'] - 1.0
    stock_data.dropna(how='any', inplace=True)# 删除所有有空值的数据行
    # 筛选出KDJ金叉的数据,并将这些数据合并到all_stock中
    stock_data = stock_data[(stock_data['KDJ_金叉死叉'] == '金叉')]
    if not stock_data.empty:
        return stock_data
Пример #19
0
    def plot_Ndays_Break(self, stock_df):
        N1 = 42
        N2 = 30
        stock_df['N1_High'] = pd.rolling_max(stock_df.High,
                                             window=N1)  #计算最近N1个交易日最高价
        stock_df['N1_High'] = stock_df['N1_High'].shift(1)
        expan_max = pd.expanding_max(stock_df.Close)
        stock_df['N1_High'].fillna(value=expan_max,
                                   inplace=True)  #目前出现过的最大值填充前N1个nan

        stock_df['N2_Low'] = pd.rolling_min(stock_df.Low,
                                            window=N2)  #计算最近N2个交易日最低价
        stock_df['N2_Low'] = stock_df['N2_Low'].shift(1)
        expan_min = pd.expanding_min(stock_df.Close)
        stock_df['N2_Low'].fillna(value=expan_min,
                                  inplace=True)  #目前出现过的最小值填充前N2个nan

        dispCont_List = []
        break_pd = pd.DataFrame()

        for kl_index in np.arange(0, stock_df.shape[0]):
            today = stock_df.ix[kl_index]
            """ 收盘价超过N2最低价 卖出股票持有"""
            if today['Close'] < today['N2_Low']:
                break_pd = break_pd.append(today)
                dispCont_List.append(
                    "向下突破:" + stock_df.index[kl_index].strftime('%Y-%m-%d') +
                    ',' + str(today['Close']) + '\n')  #向下突破和价格
            """ 收盘价超过N1最高价 买入股票持有"""
            if today['Close'] > today['N1_High']:
                break_pd = break_pd.append(today)
                dispCont_List.append(
                    "向上突破:" + stock_df.index[kl_index].strftime('%Y-%m-%d') +
                    ',' + str(today['Close']) + '\n')  #向上突破和价格

        return break_pd, dispCont_List
Пример #20
0

# ==========计算海龟交易法则的买卖点
# 设定海龟交易法则的两个参数,当收盘价大于最近N1天的最高价时买入,当收盘价低于最近N2天的最低价时卖出
# 这两个参数可以自行调整大小,但是一般N1 > N2
N1 = 20
N2 = 10

# 通过rolling_max方法计算最近N1个交易日的最高价
index_data['最近N1个交易日的最高点'] =  pd.rolling_max(index_data['high'], N1)
# 对于上市不足N1天的数据,取上市至今的最高价
index_data['最近N1个交易日的最高点'].fillna(value=pd.expanding_max(index_data['high']), inplace=True)

# 通过相似的方法计算最近N2个交易日的最低价
index_data['最近N2个交易日的最低点'] =  pd.rolling_min(index_data['low'], N1)
index_data['最近N2个交易日的最低点'].fillna(value=pd.expanding_min(index_data['low']), inplace=True)

# 当当天的【close】> 昨天的【最近N1个交易日的最高点】时,将【收盘发出的信号】设定为1
buy_index = index_data[index_data['close'] > index_data['最近N1个交易日的最高点'].shift(1)].index
index_data.loc[buy_index, '收盘发出的信号'] = 1
# 当当天的【close】< 昨天的【最近N2个交易日的最低点】时,将【收盘发出的信号】设定为0
sell_index = index_data[index_data['close'] < index_data['最近N2个交易日的最低点'].shift(1)].index
index_data.loc[sell_index, '收盘发出的信号'] = 0

# 计算每天的仓位,当天持有上证指数时,仓位为1,当天不持有上证指数时,仓位为0
index_data['当天的仓位'] = index_data['收盘发出的信号'].shift(1)
index_data['当天的仓位'].fillna(method='ffill', inplace=True)

# 取1992年之后的数据,排出较早的数据
index_data = index_data[index_data['date'] >= pd.to_datetime('19930101')]
     data1['fast_line'] = pd.rolling_mean(data1['close'], h)
     data1['slow_line'] = pd.rolling_mean(data1['close'], k)
     data1['fast_line'] = data1['fast_line'].fillna(
         value=pd.expanding_mean(data1['close']))
     data1['slow_line'] = data1['slow_line'].fillna(
         value=pd.expanding_mean(data1['close']))
     data1['dist_%s_%s' % (k, h)] = data1['fast_line'] - data1['slow_line']
 for h in range(10, 26, 5):
     data1['fast_line'] = ''
     data1['slow_line'] = ''
     data1['fast_line'] = pd.rolling_max(data1['high'].shift(1), h)
     data1['slow_line'] = pd.rolling_min(data1['low'].shift(1), h)
     data1['fast_line'] = data1['fast_line'].fillna(
         value=pd.expanding_max(data1['high']))
     data1['slow_line'] = data1['slow_line'].fillna(
         value=pd.expanding_min(data1['low']))
     data1['dist_high_%s' % h] = data1['high'] - data1['fast_line']
     data1['dist_low_%s' % h] = data1['low'] - data1['slow_line']
 data1 = MACD(data1, 12, 26, 9)
 data2 = pd.read_csv('rb888_2017.csv', parse_dates=True, index_col='time')
 data2.reset_index(inplace=True)
 data2['log_return'] = np.log(data2['close'] / data2['close'].shift(1))
 data2['log_return'] = data2['log_return'].fillna(0)
 for h, k in [(5, 20), (15, 20), (5, 10), (5, 15), (10, 15)]:
     data2['fast_line'] = ''
     data2['slow_line'] = ''
     data2['fast_line'] = pd.rolling_mean(data2['close'], h)
     data2['slow_line'] = pd.rolling_mean(data2['close'], k)
     data2['fast_line'] = data2['fast_line'].fillna(
         value=pd.expanding_mean(data2['close']))
     data2['slow_line'] = data2['slow_line'].fillna(
Пример #22
0
这两个参数可以自行调整大小,但是一般N1 > N2
'''
N1 = 20
N2 = 10

'''
 通过rolling_max方法计算最近N1个交易日的最高价, 最近N1个交易日的最高点: MaxIn_N1
对于上市不足N1天的数据,取上市至今的最高价
'''
index_data['MaxIn_N1'] =  pandas.rolling_max(index_data['High'], N1)
index_data['MaxIn_N1'].fillna(value=pandas.expanding_max(index_data['High']), inplace=True)
'''
通过rolling_min方法计算最近N2个交易日的最低价, MinIn_N2:MinIn_N2
'''
index_data['MinIn_N2'] =  pandas.rolling_min(index_data['Low'], N1)
index_data['MinIn_N2'].fillna(value=pandas.expanding_min(index_data['Low']), inplace=True)

# 当当天的【close】> 昨天的【MaxIn_N1】时,将【收盘发出的信号】设定为1
buy_index = index_data[index_data['Close'] > index_data['MaxIn_N1'].shift(1)].index
index_data.loc[buy_index, '收盘发出的信号'] = 1

# 当当天的【close】< 昨天的【MinIn_N2】时,将【收盘发出的信号】设定为0
sell_index = index_data[index_data['Close'] < index_data['MinIn_N2'].shift(1)].index
index_data.loc[sell_index, '收盘发出的信号'] = 0


'''
The following sources are used for the test of the above function
# 计算每天的仓位,当天持有上证指数时,仓位为1,当天不持有上证指数时,仓位为0
index_data['当天的仓位'] = index_data['收盘发出的信号'].shift(1)
index_data['当天的仓位'].fillna(method='ffill', inplace=True)
Пример #23
0
    data['fast_line']=pd.rolling_mean(data['open'],h)
    data['slow_line']=pd.rolling_mean(data['open'],k)
    data['fast_line']=data['fast_line'].fillna(value=pd.expanding_mean(data['open']))
    data['slow_line']=data['slow_line'].fillna(value=pd.expanding_mean(data['open']))
    data['dist_%s_%s'%(k,h)]=data['fast_line']-data['slow_line']
for i in range(5,31,5):
    data['MA_%s'%i]=pd.rolling_mean(data['open'],i)
    data['MA_%s'%i]=data['MA_%s'%i].fillna(0)-data['open']
data=MACD(data,12,26,9)
for h in range(10,26,5):
    data['fast_line']=''
    data['slow_line']=''
    data['fast_line']=pd.rolling_max(data['high'].shift(1),h)
    data['slow_line']=pd.rolling_min(data['low'].shift(1),h)
    data['fast_line']=data['fast_line'].fillna(value=pd.expanding_max(data['high']))
    data['slow_line']=data['slow_line'].fillna(value=pd.expanding_min(data['low']))
    data['dist_high_%s'%h]=data['high']-data['fast_line']
    data['dist_low_%s'%h]=data['low']-data['slow_line']
#引入隐马尔科夫模型
factor_list=['close','volume','dist_10_5','dist_15_5','dist_20_5','dist_15_10','dist_20_10','dist_20_15','dist_30_15','log_return','log_return_5','MACD','dist_high_10','dist_high_15','dist_high_20','dist_high_25','dist_low_10','dist_low_15','dist_low_20','dist_low_25','MA_5','MA_10','MA_15','MA_20','MA_25','MA_30']
for i in factor_list:
    X = np.column_stack([data[i]])
    model = GaussianHMM(n_components=3, covariance_type="diag", n_iter=1000,random_state=0).fit(X)
    hidden_states = model.predict(X)
    plt.figure(figsize=(15, 8))  
    for k in range(model.n_components):
        idx = (hidden_states==k)
        plt.plot_date(data['time'][idx],data['close'][idx],'.',label='%dth hidden state'%k,lw=1)
        plt.legend()
        plt.grid(1)
    plt.savefig('C:/Users/Public/Documents/Python Scripts/隐马尔科夫状态刻画图集开盘价(2015)/%s.png'%(i))
Пример #24
0
for root, dirs, files in os.walk('all_trading_data/stock data'):
    if files:
        for f in files:
            if '.csv' in f:
                stock_code_list.append(f.split('.csv')[0])

all_stock = pd.DataFrame()
for code in stock_code_list:
    print(code)

    stock_data = pd.read_csv('all_trading_data/stock data/' + code + '.csv',
                             parse_dates=[1])
    stock_data.sort('date', inplace=True)

    low_list = pd.rolling_min(stock_data['low'], 9)
    low_list.fillna(value=pd.expanding_min(stock_data['low']), inplace=True)
    high_list = pd.rolling_max(stock_data['high'], 9)
    high_list.fillna(value=pd.expanding_max(stock_data['high']), inplace=True)
    rsv = (stock_data['close'] - low_list) / (high_list - low_list) * 100
    stock_data['KDJ_K'] = pd.ewma(rsv, com=2)
    stock_data['KDJ_D'] = pd.ewma(stock_data['KDJ_K'], com=2)
    stock_data['KDJ_J'] = 3 * stock_data['KDJ_K'] - 2 * stock_data['KDJ_D']

    stock_data['KDJ_金叉死叉'] = ''
    kdj_position = stock_data['KDJ_K'] > stock_data['KDJ_D']
    stock_data.loc[kdj_position[(kdj_position == True)
                                & (kdj_position.shitf() == False)].index,
                   'KDJ_金叉死叉'] = '金叉'
    stock_data.loc[kdj_position[(kdj_position == False)
                                & (kdj_position.shitf() == True)].index,
                   'KDJ_金叉死叉'] = '死叉'
Пример #25
0
 def LLV(self, param):
     if param[1] == 0:
         return pd.expanding_min(param[0])
     return pd.rolling_min(param[0], param[1])
Пример #26
0
def longsklearn(code='999999', ptype='f',dtype='d',start=None,end=None):
    # code='999999'
    # dtype = 'w'
    # start = '2014-09-01'
    # start = None
    # end='2015-12-23'
    # end = None
    df = tdd.get_tdx_append_now_df(code, ptype, start, end).sort_index(ascending=True)
    # if not dtype == 'd':
        # df = tdd.get_tdx_stock_period_to_type(df, dtype).sort_index(ascending=True)
    dw = tdd.get_tdx_stock_period_to_type(df, dtype).sort_index(ascending=True)
    # print df[:1]
    h = df.loc[:, ['open', 'close', 'high', 'low']]
    highp = h['high'].values
    lowp = h['low'].values
    openp = h['open'].values
    closep = h['close'].values
    lr = LinearRegression()
    x = np.atleast_2d(np.linspace(0, len(closep), len(closep))).T
    lr.fit(x, closep)
    LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
    xt = np.atleast_2d(np.linspace(0, len(closep) + 200, len(closep) + 200)).T
    yt = lr.predict(xt)
    # plt.plot(xt,yt,'-g',linewidth=5)
    # plt.plot(closep)
    bV = []
    bP = []

    uV = []
    uP = []
    for i in range(1, len(highp) - 1):
        # if highp[i] <= highp[i - 1] and highp[i] < highp[i + 1] and lowp[i] <= lowp[i - 1] and lowp[i] < lowp[i + 1]:
        if lowp[i] <= lowp[i - 1] and lowp[i] < lowp[i + 1]:
            bV.append(lowp[i])
            bP.append(i)

    for i in range(1, len(highp) - 1):
        # if highp[i] >= highp[i - 1] and highp[i] > highp[i + 1] and lowp[i] >= lowp[i - 1] and lowp[i] > lowp[i + 1]:
        if highp[i] >= highp[i - 1] and highp[i] > highp[i + 1]:
            uV.append(highp[i])
            uP.append(i)
    print highp
    print "uV:%s" % uV[:1]
    print "uP:%s" % uP[:1]
    print "bV:%s" % bV[:1]
    print "bP:%s" % bP[:1]

    sV, sP = LIS(uV)
    dV, dP = LIS(bV)
    print "sV:%s" % sV[:1]
    print "sP:%s" % sP[:1]
    print "dV:%s" % dV[:1]
    print "dP:%s" % dP[:1]
    sidx = []
    didx = []
    for i in range(len(sP)):
        # idx.append(bP[p[i]])
        sidx.append(uP[sP[i]])
    for i in range(len(dP)):
        # idx.append(bP[p[i]])
        didx.append(bP[dP[i]])

    print "sidx:%s"%sidx[:1]
    print "didx:%s"%didx[:1]

    # plt.plot(closep)
    # plt.plot(idx,d,'ko')
    lr = LinearRegression()
    X = np.atleast_2d(np.array(sidx)).T
    Y = np.array(sV)
    lr.fit(X, Y)
    estV = lr.predict(xt)

    fig = plt.figure(figsize=(16, 10), dpi=72)
    # plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9)
    plt.subplots_adjust(left=0.05, bottom=0.08, right=0.95, top=0.95, wspace=0.15, hspace=0.25)
    # set (gca,'Position',[0,0,512,512])
    # fig.set_size_inches(18.5, 10.5)
    # fig=plt.fig(figsize=(14,8))
    ax = fig.add_subplot(111)
    plt.grid(True)
    # print h.index[:5], h['close']
    ax = h['close'].plot()
    # ax.plot(pd.datetime(h.index),h['close'], linewidth=1)
    # ax.plot(uP, uV, linewidth=1)
    # ax.plot(uP, uV, 'ko')
    # ax.plot(bP, bV, linewidth=1)
    # ax.plot(bP, bV, 'bo')
#    # ax.plot(sP, sV, linewidth=1)
#    # ax.plot(sP, sV, 'yo')
    # ax.plot(sidx, sV, linewidth=1)
    # ax.plot(sidx, sV, 'ro')
    # ax.plot(didx, dV, linewidth=1)
    # ax.plot(didx, dV, 'co')
    df['mean']=map(lambda h,l:(h+l)/2,df.high.values,df.low.values)
    print df['mean'][:1]
    # d=df.mean
    dw=dw.set_index('date')
    # print dw[:2]
    # ax.plot(df.index,df['mean'],'g',linewidth=1)
    ax.plot(df.index,pd.rolling_mean(df['mean'], 60), 'g',linewidth=1)
    ax.plot(dw.index,pd.rolling_mean(dw.close, 5), 'r',linewidth=1)
    ax.plot(dw.index,pd.rolling_min(dw.close, 5), 'bo')
    ax.plot(dw.index,pd.rolling_max(dw.close, 5), 'yo')
    ax.plot(dw.index,pd.expanding_max(dw.close, 5), 'ro')
    ax.plot(dw.index,pd.expanding_min(dw.close, 5), 'go')
    # print pd.rolling_min(df.close,20)[:1],pd.rolling_min(df.close,20)[-1:]
    # print pd.rolling_min(df.close,20)
    # print pd.rolling_max(df.close,20)[:1],pd.rolling_max(df.close,20)[-1:]
    # print pd.rolling_max(df.close,20)

    # ax.plot(idx, d, 'ko')
    # ax.plot(xt, estV, '-r', linewidth=5)
    # ax.plot(xt, yt, '-g', linewidth=5)

    # ax2 = fig.add_subplot(122)
    # print len(closep),len(idx),len(d),len(xt),len(estV),len(yt)
    # f=lambda x:x[-int(len(x)/10):]
    # ax2.plot(f(closep))
    # ax2.plot(f(idx),f(d),'ko')
    # ax2.plot(f(xt),f(estV),'-r',linewidth=5)
    # ax2.plot(f(xt),f(yt),'-g',linewidth=5)
    # # plt.show()
    scale = 1.1
    zp = zoompan.ZoomPan()
    figZoom = zp.zoom_factory(ax, base_scale=scale)
    figPan = zp.pan_factory(ax)
    show()
Пример #27
0

# ==========计算海龟交易法则的买卖点
# 设定海龟交易法则的两个参数,当收盘价大于最近N1天的最高价时买入,当收盘价低于最近N2天的最低价时卖出
# 这两个参数可以自行调整大小,但是一般N1 > N2
N1 = 20
N2 = 10

# 通过rolling_max方法计算最近N1个交易日的最高价
index_data['最近N1个交易日的最高点'] =  pd.rolling_max(index_data['high'], N1)
# 对于上市不足N1天的数据,取上市至今的最高价
index_data['最近N1个交易日的最高点'].fillna(value=pd.expanding_max(index_data['high']), inplace=True)

# 通过相似的方法计算最近N2个交易日的最低价
index_data['最近N2个交易日的最低点'] =  pd.rolling_min(index_data['low'], N1)
index_data['最近N2个交易日的最低点'].fillna(value=pd.expanding_min(index_data['low']), inplace=True)

# 当当天的【close】> 昨天的【最近N1个交易日的最高点】时,将【收盘发出的信号】设定为1
buy_index = index_data[index_data['close'] > index_data['最近N1个交易日的最高点'].shift(1)].index
index_data.loc[buy_index, '收盘发出的信号'] = 1
# 当当天的【close】< 昨天的【最近N2个交易日的最低点】时,将【收盘发出的信号】设定为0
sell_index = index_data[index_data['close'] < index_data['最近N2个交易日的最低点'].shift(1)].index
index_data.loc[sell_index, '收盘发出的信号'] = 0

# 计算每天的仓位,当天持有上证指数时,仓位为1,当天不持有上证指数时,仓位为0
index_data['当天的仓位'] = index_data['收盘发出的信号'].shift(1)
index_data['当天的仓位'].fillna(method='ffill', inplace=True)

# 取1992年之后的数据,排出较早的数据
index_data = index_data[index_data['date'] >= pd.to_datetime('19930101')]
Пример #28
0
def comput_idicators(df,
                     trading_days,
                     required,
                     save_file,
                     save_address,
                     whole=1):
    # TODO:net_value has some problem.
    # columns needed
    col = ['index_price', 'Interest_rate', 'nav', 'rebalancing', 'stoploss']
    df_valid = df.ix[:, col]
    start_balance = df.index[df['rebalancing'] == 1][0]
    df_valid = df_valid[df_valid.index >= start_balance]

    # daily return
    df_valid['return'] = np.log(df['nav']) - np.log(df['nav'].shift(1))
    # benchmark_net_value
    df_valid[
        'benchmark'] = df_valid['index_price'] / df_valid['index_price'].ix[0]
    # benchmark_return
    df_valid['benchmark_return'] = (df_valid['benchmark']-
                                           df_valid['benchmark'].shift(1))/\
                                   df_valid['benchmark'].shift(1)
    # Annualized return
    df_valid['Annu_return'] = pd.expanding_mean(
        df_valid['return']) * trading_days
    # Volatility
    df_valid.loc[:, 'algo_volatility'] = pd.expanding_std(
        df_valid['return']) * np.sqrt(trading_days)
    df_valid.loc[:, 'xret'] = df_valid[
        'return'] - df_valid['Interest_rate'] / trading_days / 100
    df_valid.loc[:, 'ex_return'] = df_valid['return'] - df_valid[
        'benchmark_return']

    def ratio(x):
        return np.nanmean(x) / np.nanstd(x)

    # sharpe ratio
    df_valid.loc[:, 'sharpe'] = pd.expanding_apply(df_valid['xret'], ratio)\
                                * np.sqrt(trading_days)
    # information ratio
    df_valid.loc[:, 'IR'] = pd.expanding_apply(df_valid['ex_return'], ratio)\
                                * np.sqrt(trading_days)

    # Sortino ratio
    def modify_ratio(x, re):
        re /= trading_days
        ret = np.nanmean(x) - re
        st_d = np.nansum(np.square(x[x < re] - re)) / x[x < re].size
        return ret / np.sqrt(st_d)

    df_valid.loc[:, 'sortino'] = pd.expanding_apply(
        df_valid['return'], modify_ratio,
        args=(required, )) * np.sqrt(trading_days)
    # Transfer infs to NA
    df_valid.loc[np.isinf(df_valid.loc[:, 'sharpe']), 'sharpe'] = np.nan
    df_valid.loc[np.isinf(df_valid.loc[:, 'IR']), 'IR'] = np.nan
    # hit_rate
    wins = np.where(df_valid['return'] >= df_valid['benchmark_return'], 1.0,
                    0.0)
    df_valid.loc[:, 'hit_rate'] = wins.cumsum() / pd.expanding_apply(wins, len)
    # 95% VaR
    df_valid['VaR'] = -pd.expanding_quantile(df_valid['return'], 0.05)*\
                      np.sqrt(trading_days)
    # 95% CVaR
    df_valid['CVaR'] = -pd.expanding_apply(df_valid['return'],
                                          lambda x: x[x < np.nanpercentile(x, 5)].mean())\
                       * np.sqrt(trading_days)

    if whole == 1:
        # max_drawdown
        def exp_diff(x, type):
            if type == 'dollar':
                xret = pd.expanding_apply(x, lambda xx: (xx[-1] - xx.max()))
            else:
                xret = pd.expanding_apply(
                    x, lambda xx: (xx[-1] - xx.max()) / xx.max())
            return xret
    # dollar
    #     xret = exp_diff(df_valid['cum_profit'],'dollar')
    #     df_valid['max_drawdown_profit'] = abs(pd.expanding_min(xret))
    # percentage

        xret = exp_diff(df_valid['nav'], 'percentage')
        df_valid['max_drawdown_ret'] = abs(pd.expanding_min(xret))

        # max_drawdown_duration:
        # drawdown_enddate is the first time for restoring the max
        def drawdown_end(x, type):
            xret = exp_diff(x, type)
            minloc = xret[xret == xret.min()].index[0]
            x_sub = xret[xret.index > minloc]
            # if never recovering,then return nan
            try:
                return x_sub[x_sub == 0].index[0]
            except:
                return np.nan

        def drawdown_start(x, type):
            xret = exp_diff(x, type)
            minloc = xret[xret == xret.min()].index[0]
            x_sub = xret[xret.index < minloc]
            try:
                return x_sub[x_sub == 0].index[-1]
            except:
                return np.nan

        df_valid['max_drawdown_start'] = pd.Series()
        df_valid['max_drawdown_end'] = pd.Series()
        df_valid['max_drawdown_start'].ix[-1] = drawdown_start(
            df_valid['nav'], 'percentage')
        df_valid['max_drawdown_end'].ix[-1] = drawdown_end(
            df_valid['nav'], 'percentage')
    df_valid.to_csv(save_address)
    # =====result visualization=====
    plt.figure(1)
    if whole == 1:
        plt.subplot(224)
        plt.plot(df_valid['nav'], label='strategy')
        plt.plot(df_valid['benchmark'], label='S&P500')
    plt.xlabel('Date')
    plt.legend(loc=0, shadow=True)
    plt.ylabel('Nav')
    plt.title('Nav of ' + save_file + ' & SP500')

    # plt.subplot(223)
    # plt.plot(df_valid['cum_profit'],label = 'strategy')
    # plt.xlabel('Date')
    # plt.ylabel('Cum_profit')
    # plt.title('Cum_profit of ' + save_file)

    plt.subplot(221)
    plt.plot(df_valid['return'], label='strategy')
    plt.xlabel('Date')
    plt.ylabel('Daily_return')
    plt.title('Daily Return of ' + save_file)

    plt.subplot(222)
    x_return = df_valid[df_valid['return'].notna()].loc[:, 'return']
    y_return = df_valid[
        df_valid['benchmark_return'].notna()].loc[:, 'benchmark_return']
    mu = x_return.mean()
    sigma = x_return.std()
    mybins = np.linspace(mu - 3 * sigma, mu + 3 * sigma, 100)
    count_x, _, _ = plt.hist(x_return,
                             mybins,
                             normed=1,
                             alpha=0.5,
                             label='strategy')
    count_y, _, _ = plt.hist(y_return,
                             mybins,
                             normed=1,
                             alpha=0.5,
                             label='S&P500')
    plt.ylabel('density')
    plt.xlabel('daily_return')
    plt.title('Histogram of Daily Return for ' + save_file + ' & SP500')
    plt.grid(True)
    # add normal distribution line
    y = mlab.normpdf(mybins, mu, sigma)
    plt.plot(mybins, y, 'r--', linewidth=1, label='Normal of strategy')
    plt.legend(loc=0, shadow=True)
    # plt.tight_layout()
    plt.show()
    return df_valid
Пример #29
0
df['n1_high'] = df['high'].rolling(window=N1,center=False).max()
# print(df.head(43))

# 用pd.expanding_max()从第一个开始依次寻找目前出现过的最大值
# 实例如下
# demo_list = np.array([1,2,1,1,500,100])
# pd.expanding_max(demo_list)
# array([   1.,    2.,    2.,    2.,  500.,  500.])

expan_max = pd.expanding_max(df['close'])
#fillna() 将NaN替换为当前的序列
df['n1_high'].fillna(value=expan_max, inplace=True)
# print(df.head(43))

df['n2_low'] = df['low'].rolling(window=N2,center=False).min()
expan_min = pd.expanding_min(df['close'])
df['n2_low'].fillna(value=expan_min, inplace=True)
# print(df.head(22))

##做空的序列
df['n1_low'] = df['low'].rolling(window=N1,center=False).min()
df['n1_low'].fillna(value=expan_min, inplace=True)

df['n2_high'] = df['high'].rolling(window=N2,center=False).max()
df['n2_high'].fillna(value=expan_max, inplace=True)


#接下来根据突破的定义来构建signal列
#当天收盘价格超过N天最高或最低价,超过最高价是作为买入信号
#buy_index=行的索引,本例中就是日期,而且是满足close大于n1_high的情况下的索引
buy_index_in = df[df['close'] > df['n1_high'].shift(1)].index  #shift 是移动序列,即将整个n1_high列都下移一格
Пример #30
0
# 2014-05-28  210.24  212.77  205.26    -0.624  210.02     211.56  5496278
# 2014-05-29  210.24  212.49  207.72     0.000  210.57     210.24  3694596
# 2014-05-30  207.77  214.80  207.02    -1.175  210.30     210.24  5586068
# 2014-06-02  204.70  209.35  201.67    -1.478  207.33     207.77  4668115
# 2014-06-03  204.94  208.00  202.59     0.117  203.49     204.70  3866182
#
#                 date  date_week   atr21   atr14  key  n1_high
# 2014-05-28  20140528          2  7.5100  7.5100    0   210.24
# 2014-05-29  20140529          3  6.0748  6.0421    1   210.24
# 2014-05-30  20140530          4  6.6981  6.7060    2   210.24
# 2014-06-02  20140602          0  7.2350  7.2763    3   210.24
# 2014-06-03  20140603          1  6.7973  6.7894    4   210.24
#下面使用类似的方式构建N2天内最低价格卖出信号n2_low:
#rolling_min()函数和rolling_max()函数类似
tsla_df['n2_low'] = pd.rolling_min(tsla_df['low'], window=N2)
expan_min = pd.expanding_min(tsla_df['close'])
tsla_df['n2_low'].fillna(value=expan_min, inplace=True)
#下面根据突破的定义来构建signal列:
#当天的收盘价格超过N天内的最高价或最低价,超过最高价格作为买入信号买入股票持有
buy_index = tsla_df[tsla_df['close'] > tsla_df['n1_high'].shift(1)].index
tsla_df.loc[buy_index, 'signal'] = 1
#当天收盘价格超过N天内的最高价格或最低价格,超过最低价格作为卖出信号
sell_index = tsla_df[tsla_df['close'] < tsla_df['n2_low'].shift(1)].index
tsla_df.loc[sell_index, 'signal'] = 0
#筛选条件 今天的收盘价格>截止到昨天的最高价格 和 今天的收盘价格 < 截止到昨天的最低价格
#下面使用饼图显示在整个交易中信号的产生情况,可以发现买入信号比卖出信号多
#如下图所示
tsla_df.signal.value_counts().plot(kind='pie', figsize=(5, 5))
# 1.0    54
# 0.0    53
# Name: signal, dtype: int64
Пример #31
0
    def analyse(self):

        # Logger.log(logging.INFO, "Analyse Strategy", {"scope":__name__, "Rule 1":self._rule1, "Rule 2":self._rule2, "Rule 3":self._rule3, "Type":self._type})

        connection = sqlite3.connect(pyswing.database.pySwingDatabase)
        query = self.analyseStrategySql % (self._rule1, self._rule2,
                                           self._rule3, self._exit, self._type)
        self._strategyData = read_sql_query(query, connection, 'Date')
        self._strategyData[
            'ExitValueAfterCosts'] = self._strategyData['ExitValue'] - 0.2
        connection.close()

        exitValueDataFrame = self._strategyData.ix[:, 'ExitValueAfterCosts']

        mean = exitValueDataFrame.mean()
        median = exitValueDataFrame.median()
        sum = exitValueDataFrame.sum()
        count = exitValueDataFrame.count()

        tradesPerYear = count / 10
        sharpeRatio = sqrt(tradesPerYear) * exitValueDataFrame.mean(
        ) / exitValueDataFrame.std()

        self._strategyData["Sum"] = expanding_sum(exitValueDataFrame)
        self._strategyData["Max"] = expanding_max(self._strategyData["Sum"])
        self._strategyData["Min"] = expanding_min(self._strategyData["Sum"])
        self._strategyData[
            "DD"] = self._strategyData["Max"] - self._strategyData["Min"]

        runningSum = expanding_sum(exitValueDataFrame)
        max2here = expanding_max(runningSum)
        dd2here = runningSum - max2here
        drawDown = dd2here.min()

        Logger.log(
            logging.INFO, "Analysing Strategy", {
                "scope": __name__,
                "Rule 1": self._rule1,
                "Rule 2": self._rule2,
                "Rule 3": self._rule3,
                "Exit": self._exit,
                "Type": self._type,
                "Mean": str(mean),
                "Median": str(median),
                "Sum": str(sum),
                "Count": str(count),
                "SharpeRatio": str(sharpeRatio),
                "DrawDown": str(drawDown)
            })

        connection = sqlite3.connect(pyswing.database.pySwingDatabase)
        c = connection.cursor()

        deleteSql = self.deleteStrategySql % (
            pyswing.globals.pySwingStrategy, self._rule1, self._rule2,
            self._rule3, self._exit, self._type)
        c.executescript(deleteSql)
        connection.commit()

        insertSql = self.insertStrategySql % (
            pyswing.globals.pySwingStrategy, self._rule1, self._rule2,
            self._rule3, self._exit, self._type, str(mean), str(median),
            str(sum), str(count), str(sharpeRatio), str(drawDown))
        c.executescript(insertSql)
        connection.commit()

        c.close()
        connection.close()
Пример #32
0
if __name__ == '__main__':
    kl_pd = ABuSymbolPd.make_kl_df('TSLA', n_folds=2)
    # 1、这里采用N日趋势突破,即超过N1天内的最高价,就买入,低于N2天内的最低价,就卖出
    N1 = 42
    N2 = 21

    # 2.1 采用pd.rolling_max可以寻找一个窗口长度内最大值
    kl_pd['n1_high'] = pd.rolling_max(kl_pd['high'], window=N1)
    # 2.2 但这样会导致前N1-1个元素为NAN,
    # 我们使用pd.expanding_max来填充NAN,expanding_max会逐个遍历数组元素,并把返回直到当前位置看到过的最大元素
    # 用前k天的收盘价来代替,k∈[0,N1]
    expan_max = pd.expanding_max(kl_pd['close'])
    kl_pd['n1_high'].fillna(expan_max, inplace=True)
    # 2.3 最小值同理
    kl_pd['n2_low'] = pd.rolling_min(kl_pd['low'], window=N2)
    expan_min = pd.expanding_min(kl_pd['close'])
    kl_pd['n2_low'].fillna(expan_min, inplace=True)
    print(kl_pd.head())

    # 3.1 根据n1_high和n2_low来定义买入卖出的信号序列
    # 注意,是当天的收盘价,高于昨天以前的n1值,就买入,不能包括今天的,因为今天的收盘价,怎么也不会高于今天的最高值
    buy_signal = kl_pd[kl_pd.close > kl_pd.n1_high.shift(1)].index
    kl_pd.loc[buy_signal, 'signal'] = 1
    # 3.2 n2_low的卖出信号同理
    sell_signal = kl_pd[kl_pd.close < kl_pd.n2_low.shift(1)].index
    kl_pd.loc[sell_signal, 'signal'] = 0
    # 3.3 这里可以不用考虑Nan的情况
    kl_pd.signal.value_counts().plot(kind='pie')
    plt.show()

    # 4.1 将买入卖出的信号转化为持股的状态
Пример #33
0
def longsklearn(code='999999', ptype='f', dtype='d', start=None, end=None):
    # code='999999'
    # dtype = 'w'
    # start = '2014-09-01'
    # start = None
    # end='2015-12-23'
    # end = None
    df = tdd.get_tdx_append_now_df(code, ptype, start,
                                   end).sort_index(ascending=True)
    # if not dtype == 'd':
    # df = tdd.get_tdx_stock_period_to_type(df, dtype).sort_index(ascending=True)
    dw = tdd.get_tdx_stock_period_to_type(df, dtype).sort_index(ascending=True)
    # print df[:1]
    h = df.loc[:, ['open', 'close', 'high', 'low']]
    highp = h['high'].values
    lowp = h['low'].values
    openp = h['open'].values
    closep = h['close'].values
    lr = LinearRegression()
    x = np.atleast_2d(np.linspace(0, len(closep), len(closep))).T
    lr.fit(x, closep)
    LinearRegression(copy_X=True,
                     fit_intercept=True,
                     n_jobs=1,
                     normalize=False)
    xt = np.atleast_2d(np.linspace(0, len(closep) + 200, len(closep) + 200)).T
    yt = lr.predict(xt)
    # plt.plot(xt,yt,'-g',linewidth=5)
    # plt.plot(closep)
    bV = []
    bP = []

    uV = []
    uP = []
    for i in range(1, len(highp) - 1):
        # if highp[i] <= highp[i - 1] and highp[i] < highp[i + 1] and lowp[i] <= lowp[i - 1] and lowp[i] < lowp[i + 1]:
        if lowp[i] <= lowp[i - 1] and lowp[i] < lowp[i + 1]:
            bV.append(lowp[i])
            bP.append(i)

    for i in range(1, len(highp) - 1):
        # if highp[i] >= highp[i - 1] and highp[i] > highp[i + 1] and lowp[i] >= lowp[i - 1] and lowp[i] > lowp[i + 1]:
        if highp[i] >= highp[i - 1] and highp[i] > highp[i + 1]:
            uV.append(highp[i])
            uP.append(i)
    print(highp)
    print("uV:%s" % uV[:1])
    print("uP:%s" % uP[:1])
    print("bV:%s" % bV[:1])
    print("bP:%s" % bP[:1])

    sV, sP = LIS(uV)
    dV, dP = LIS(bV)
    print("sV:%s" % sV[:1])
    print("sP:%s" % sP[:1])
    print("dV:%s" % dV[:1])
    print("dP:%s" % dP[:1])
    sidx = []
    didx = []
    for i in range(len(sP)):
        # idx.append(bP[p[i]])
        sidx.append(uP[sP[i]])
    for i in range(len(dP)):
        # idx.append(bP[p[i]])
        didx.append(bP[dP[i]])

    print("sidx:%s" % sidx[:1])
    print("didx:%s" % didx[:1])

    # plt.plot(closep)
    # plt.plot(idx,d,'ko')
    lr = LinearRegression()
    X = np.atleast_2d(np.array(sidx)).T
    Y = np.array(sV)
    lr.fit(X, Y)
    estV = lr.predict(xt)

    fig = plt.figure(figsize=(16, 10), dpi=72)
    # plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9)
    plt.subplots_adjust(left=0.05,
                        bottom=0.08,
                        right=0.95,
                        top=0.95,
                        wspace=0.15,
                        hspace=0.25)
    # set (gca,'Position',[0,0,512,512])
    # fig.set_size_inches(18.5, 10.5)
    # fig=plt.fig(figsize=(14,8))
    ax = fig.add_subplot(111)
    plt.grid(True)
    # print h.index[:5], h['close']
    ax = h['close'].plot()
    # ax.plot(pd.datetime(h.index),h['close'], linewidth=1)
    # ax.plot(uP, uV, linewidth=1)
    # ax.plot(uP, uV, 'ko')
    # ax.plot(bP, bV, linewidth=1)
    # ax.plot(bP, bV, 'bo')
    #    # ax.plot(sP, sV, linewidth=1)
    #    # ax.plot(sP, sV, 'yo')
    # ax.plot(sidx, sV, linewidth=1)
    # ax.plot(sidx, sV, 'ro')
    # ax.plot(didx, dV, linewidth=1)
    # ax.plot(didx, dV, 'co')
    df['mean'] = list(
        map(lambda h, l: (h + l) / 2, df.high.values, df.low.values))
    print(df['mean'][:1])
    # d=df.mean
    dw = dw.set_index('date')
    # print dw[:2]
    # ax.plot(df.index,df['mean'],'g',linewidth=1)
    ax.plot(df.index, pd.rolling_mean(df['mean'], 60), 'g', linewidth=1)
    ax.plot(dw.index, pd.rolling_mean(dw.close, 5), 'r', linewidth=1)
    ax.plot(dw.index, pd.rolling_min(dw.close, 5), 'bo')
    ax.plot(dw.index, pd.rolling_max(dw.close, 5), 'yo')
    ax.plot(dw.index, pd.expanding_max(dw.close, 5), 'ro')
    ax.plot(dw.index, pd.expanding_min(dw.close, 5), 'go')
    # print pd.rolling_min(df.close,20)[:1],pd.rolling_min(df.close,20)[-1:]
    # print pd.rolling_min(df.close,20)
    # print pd.rolling_max(df.close,20)[:1],pd.rolling_max(df.close,20)[-1:]
    # print pd.rolling_max(df.close,20)

    # ax.plot(idx, d, 'ko')
    # ax.plot(xt, estV, '-r', linewidth=5)
    # ax.plot(xt, yt, '-g', linewidth=5)

    # ax2 = fig.add_subplot(122)
    # print len(closep),len(idx),len(d),len(xt),len(estV),len(yt)
    # f=lambda x:x[-int(len(x)/10):]
    # ax2.plot(f(closep))
    # ax2.plot(f(idx),f(d),'ko')
    # ax2.plot(f(xt),f(estV),'-r',linewidth=5)
    # ax2.plot(f(xt),f(yt),'-g',linewidth=5)
    # # plt.show()
    scale = 1.1
    zp = zoompan.ZoomPan()
    figZoom = zp.zoom_factory(ax, base_scale=scale)
    figPan = zp.pan_factory(ax)
    show()
# ========== 根据上一步得到的代码列表,遍历所有股票,将这些股票合并到一张表格all_stock中
all_stock = pd.DataFrame()

# 遍历每个创业板的股票
for code in stock_code_list:
    print code

    # 从csv文件中读取该股票数据
    stock_data = pd.read_csv('trading-data@full/stock data/' + code + '.csv',
                             parse_dates=[1])# 注意:这里请填写数据文件在您电脑中的路径
    stock_data.sort('date', inplace=True)# 对数据按照【date】交易日期从小到大排序

    # 计算KDJ指标
    low_list = pd.rolling_min(stock_data['low'], 9)
    low_list.fillna(value=pd.expanding_min(stock_data['low']), inplace=True)
    high_list = pd.rolling_max(stock_data['high'], 9)
    high_list.fillna(value=pd.expanding_max(stock_data['high']), inplace=True)
    rsv = (stock_data['close'] - low_list) / (high_list - low_list) * 100
    stock_data['KDJ_K'] = pd.ewma(rsv, com=2)
    stock_data['KDJ_D'] = pd.ewma(stock_data['KDJ_K'], com=2)
    stock_data['KDJ_J'] = 3 * stock_data['KDJ_K'] - 2 * stock_data['KDJ_D']
    # 计算KDJ指标金叉、死叉情况
    stock_data['KDJ_金叉死叉'] = ''
    kdj_position = stock_data['KDJ_K'] > stock_data['KDJ_D']
    stock_data.loc[kdj_position[(kdj_position == True) & (kdj_position.shift() == False)].index, 'KDJ_金叉死叉'] = '金叉'
    stock_data.loc[kdj_position[(kdj_position == False) & (kdj_position.shift() == True)].index, 'KDJ_金叉死叉'] = '死叉'

    # 通过复权价格计算接下来几个交易日的收益率
    for n in [1, 2, 3, 5, 10, 20]:
        stock_data['接下来'+str(n)+'个交易日涨跌幅'] = stock_data['adjust_price'].shift(-1*n) / stock_data['adjust_price'] - 1.0