def fill_daily_k_at_suspension_days(self, begin_date=None, end_date=None): """ :param begin_date: :param end_date: :return: """ last_trading_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d') basic_cursor = DB_CONN['basic'].find({'date': last_trading_date}, projection={ 'code': True, 'timeToMarket': True, '_id': False }, batch_size=5000) basics = [basic for basic in basic_cursor] print(basics) all_dates = get_trading_dates(begin_date, end_date) self.fill_daily_k_at_suspension_days_at_date_one_collection( basics, all_dates, 'daily') self.fill_daily_k_at_suspension_days_at_date_one_collection( basics, all_dates, 'daily_hfq') self.fill_daily_k_at_suspension_days_at_date_one_collection( basics, all_dates, 'daily_qfq')
def fill_is_trading_between(self, begin_date=None, end_date=None): """ 填充指定时间段内的is_trading字段 :param begin_date: 开始日期 :param end_date: 结束日期 """ all_dates = get_trading_dates(begin_date, end_date) for date in all_dates: self.fill_single_date_is_trading(date, 'daily') self.fill_single_date_is_trading(date, 'daily_hfq') self.fill_single_date_is_trading(date, 'daily_qfq')
def fill_is_trading(self, date=None): """ 为日线数据增加is_trading字段,表示是否交易的状态,True - 交易 False - 停牌 从Tushare来的数据不包含交易状态,也不包含停牌的日K数据,为了系统中使用的方便,我们需要填充停牌是的K数据。 一旦填充了停牌的数据,那么数据库中就同时包含了停牌和交易的数据,为了区分这两种数据,就需要增加这个字段。 在填充该字段时,要考虑到是否最坏的情况,也就是数据库中可能已经包含了停牌和交易的数据,但是却没有is_trading 字段。这个方法通过交易量是否为0,来判断是否停牌 """ if date is None: all_dates = get_trading_dates() else: all_dates = [date] for date in all_dates: self.fill_single_date_is_trading(date, 'daily') self.fill_single_date_is_trading(date, 'daily_hfq') self.fill_single_date_is_trading(date, 'daily_qfq')
def crawl_basic(self, begin_date=None, end_date=None): """ 抓取指定时间范围内的股票基础信息 :param begin_date: 开始日期 :param end_date: 结束日期 """ if begin_date is None: begin_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d') if end_date is None: end_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d') all_dates = get_trading_dates(begin_date, end_date) for date in all_dates: try: self.crawl_basic_at_date(date) except: print('抓取股票基本信息时出错,日期:%s' % date, flush=True)
def compute(self, begin_date=None, end_date=None): """ 计算指定时间段内所有股票的该因子的值,并保存到数据库中 :param begin_date: 开始时间 :param end_date: 结束时间 """ dm = DataModule() # 如果没有指定日期范围,则默认为计算当前交易日的数据 if begin_date is None: begin_date = datetime.now().strftime('%Y-%m-%d') if end_date is None: end_date = datetime.now().strftime('%Y-%m-%d') dates = get_trading_dates(begin_date, end_date) for date in dates: # 查询出股票在某一交易日的总股本 df_basics = dm.get_stock_basic_at(date) if df_basics.index.size == 0: continue # 将索引改为code df_basics.set_index(['code'], 1, inplace=True) # 查询出股票在某一个交易日的收盘价 df_dailies = dm.get_one_day_k_data(autype=None, date=date) if df_dailies.index.size == 0: continue # 将索引设为code df_dailies.set_index(['code'], 1, inplace=True) update_requests = [] for code in df_dailies.index: try: # 股价 close = df_dailies.loc[code]['close'] # 总股本 total_shares = df_basics.loc[code]['totals'] # 总市值 = 股价 * 总股本 total_capital = round(close * total_shares, 2) print('%s, %s, mkt_cap: %15.2f' % (code, date, total_capital), flush=True) update_requests.append( UpdateOne( {'code': code, 'date': date}, {'$set': {'code': code, 'date': date, self.name: total_capital}}, upsert=True)) except: print('计算规模因子时发生异常,股票代码:%s,日期:%s' % (code, date), flush=True) if len(update_requests) > 0: save_result = self.collection.bulk_write(update_requests, ordered=False) print('股票代码: %s, 因子: %s, 插入:%4d, 更新: %4d' % (code, self.name, save_result.upserted_count, save_result.modified_count), flush=True)
def compute(self, begin_date, end_date): codes = get_all_codes() all_dates = get_trading_dates(begin_date=begin_date, end_date=end_date) for code in codes: update_requests = [] for date in all_dates: lrb = DB_CONN['CWBB_LRB'].find_one( { 'code': code, 'announced_date': { '$lte': date }, 'report_date': { '$regex': '\d{4}-12-31$' } }, sort=[('announced_date', DESCENDING)], projection={'parentnetprofit': True}) # 如果没有利润表信息,则跳过 if lrb is None: continue zcfzb = DB_CONN['CWBB_ZCFZB'].find_one( { 'code': code, 'announced_date': { '$lte': date }, 'report_date': { '$regex': '\d{4}-12-31$' } }, sort=[('announced_date', DESCENDING)], projection={'sumasset': True}) if zcfzb is None: continue improved_roe = round( lrb['parentnetprofit'] / zcfzb['sumasset'], 2) update_requests.append( UpdateOne({ 'code': code, 'date': date }, { '$set': { 'code': code, 'date': date, 'roe': improved_roe } }, upsert=True)) if len(update_requests) > 0: save_result = self.collection.bulk_write(update_requests, ordered=False) print('股票代码: %s, 因子: %s, 插入:%4d, 更新: %4d' % (code, self.name, save_result.upserted_count, save_result.modified_count), flush=True)
def start(self): """ 策略回测。结束后打印出收益曲线(沪深300基准)、年化收益、最大回撤 """ # 初始总资金 initial_capital = self.strategy_option.capital() # 初始现金 cash = initial_capital # 单只股票仓位上限 single_position = self.strategy_option.single_position() # 从获取策略配置中获取股票池 stock_pool = self.strategy_option.stock_pool() # 保存持仓股的日期 account = Account() # 获取卖出信号 sell_signal = self.strategy_option.sell_signal(account) # 获取买入信号 buy_signal = self.strategy_option.buy_signal(account) # 时间为key的净值、收益和同期沪深基准 df_profit = DataFrame(columns=['net_value', 'profit', 'hs300']) # 获取交易日历, all_dates = get_trading_dates(begin_date=self.begin_date, end_date=self.end_date) # 获取沪深300在统计周期内的第一天的值 hs300_k = self.dm.get_k_data('000300', index=True, begin_date=all_dates[0], end_date=all_dates[0]) hs300_begin_value = hs300_k.loc[hs300_k.index[0]]['close'] # 获取股票池数据 rebalance_dates, date_codes_dict = stock_pool.get_option_stocks() # 获取止损策略 stop_loss_policy = self.strategy_option.get_stop_loss(account) # 获取止盈策略 stop_profit_policy = self.strategy_option.get_stop_profit(account) # 获取加仓策略 add_position_policy = self.strategy_option.get_add_position(account) # 获取回测周期内股票池内所有股票的收盘价和前收价 all_option_code_set = set() for rebalance_date in rebalance_dates: for code in date_codes_dict[rebalance_date]: all_option_code_set.add(code) # 缓存股票的日线数据 for code in all_option_code_set: dailies_df = self.dm.get_k_data(code, autype=None, begin_date=self.begin_date, end_date=self.end_date) dailies_df.set_index(['date'], inplace=True) self.code_daily_cache[code] = dailies_df last_phase_codes = None this_phase_codes = None to_be_sold_codes = set() to_be_bought_codes = set() last_date = None # 加仓 to_be_added_signals = dict() to_be_added_codes = set() # 按照日期一步步回测 for _date in all_dates: self.logger.info('开始回测,日期:' + _date) # 处理复权 account.adjust_holding_volume_at_open(last_date, _date) # 卖出 if len(to_be_sold_codes) > 0: sold_codes_tmp = set(to_be_sold_codes) for code in sold_codes_tmp: try: if code in account.holding_codes: holding_stock = account.get_holding(code) holding_volume = holding_stock['volume'] sell_price = self.code_daily_cache[code].loc[ _date]['open'] low_limit = self.code_daily_cache[code].loc[_date][ 'low_limit'] if sell_price > low_limit: sell_amount = holding_volume * sell_price cash += sell_amount cost = holding_stock['cost'] single_profit = (sell_amount - cost) * 100 / cost print('卖出 %s, %6d, %6.2f, %8.2f, %4.2f' % (code, holding_volume, sell_price, sell_amount, single_profit)) else: print( '当日跌停,无法卖出,股票代码:%s, 日期: %s,价格:%7.2f,跌停价:%7.2f' % (code, _date, sell_price, low_limit), flush=True) # 从持仓股中卖出 account.sell_out(code) # 从代码列表中删除 to_be_sold_codes.remove(code) except: print('卖出时,发生异常:%s, %s' % (code, _date), flush=True) print('卖出后,现金: %10.2f' % cash) # 加仓逻辑 add_codes_tmp = set(to_be_added_codes) for code in add_codes_tmp: add_signal = to_be_added_signals[code] try: if cash > add_signal['position']: daily = self.code_daily_cache[code].loc[_date] buy_price = daily['open'] high_limit = daily['high_limit'] if buy_price < high_limit: volume = int( int(add_signal['position'] / buy_price) / 100) * 100 buy_amount = buy_price * volume cash -= buy_amount print('加仓 %s, %6d, %6.2f, %8.2f' % (code, volume, buy_price, buy_amount), flush=True) # 更新加仓后的持仓股 holding = account.get_holding(code) holding['cost'] += buy_amount holding['last_value'] += buy_amount holding['volume'] += volume holding['add_times'] += 1 holding['last_buy_hfq_price'] = buy_price * daily[ 'au_factor'] account.update_holding(code, holding) # 从待加仓列表中删除 to_be_added_codes.remove(code) del to_be_added_signals[code] else: print( '当日涨停,无法加仓,股票代码:%s, 日期: %s,价格:%7.2f,涨停价:%7.2f' % (code, _date, buy_price, high_limit), flush=True) except: print('加仓时,发生错误:%s, %s' % (code, _date), flush=True) # 买入 if len(to_be_bought_codes) > 0: sorted_to_be_bought_list = list(to_be_bought_codes) sorted_to_be_bought_list.sort() for code in sorted_to_be_bought_list: try: if cash > single_position: daily = self.code_daily_cache[code].loc[_date] buy_price = daily['open'] high_limit = daily['high_limit'] if buy_price < high_limit: volume = int( int(single_position / buy_price) / 100) * 100 buy_amount = buy_price * volume cash -= buy_amount print('买入 %s, %6d, %6.2f, %8.2f' % (code, volume, buy_price, buy_amount), flush=True) # 维护账户的持仓股 account.buy_in(code, volume=volume, cost=buy_amount) # 如果加仓策略不为空,则更新持仓股 if add_position_policy is not None: holding = account.get_holding(code) holding[ 'last_buy_hfq_price'] = buy_price * daily[ 'au_factor'] add_position_policy.update_holding( code, _date, holding) else: print( '当日涨停,无法买入,股票代码:%s, 日期: %s,价格:%7.2f,涨停价:%7.2f' % (code, _date, buy_price, high_limit), flush=True) except: print('买入时,发生错误:%s, %s' % (code, _date), flush=True) print('买入后,现金: %10.2f' % cash) # 持仓股代码列表 holding_codes = account.holding_codes # 如果调整日,则获取新一期的股票列表 if _date in rebalance_dates: # 暂存为上期的日期 if this_phase_codes is not None: last_phase_codes = this_phase_codes this_phase_codes = date_codes_dict[_date] # 找到所有调出股票代码,在第二日开盘时卖出 if last_phase_codes is not None: out_codes = self.find_out_stocks(last_phase_codes, this_phase_codes) for out_code in out_codes: if out_code in holding_codes: to_be_sold_codes.add(out_code) # 检查是否有需要第二天卖出的股票 for holding_code in holding_codes: if sell_signal.is_match(holding_code, _date): to_be_sold_codes.add(holding_code) # 检测止损信号 if stop_loss_policy is not None: for holding_code in holding_codes: if stop_loss_policy.is_stop(holding_code, _date): to_be_sold_codes.add(holding_code) print('止损,股票:%s' % holding_code, flush=True) else: stop_loss_policy.update_holding(holding_code, _date) # 检测止盈信号 if stop_profit_policy is not None: for holding_code in holding_codes: if stop_profit_policy.is_stop(holding_code, _date): to_be_sold_codes.add(holding_code) print('止盈,股票:%s' % holding_code, flush=True) else: stop_profit_policy.update_holding(holding_code, _date) print('待卖股票,日期:%s,代码列表:' % _date, to_be_sold_codes, flush=True) # 检测是否有需要建仓的股票 if add_position_policy is not None: for holding_code in holding_codes: add_signal = add_position_policy.get_add_signal( holding_code, _date) if add_signal is not None: to_be_added_signals[holding_code] = add_signal to_be_added_codes.add(holding_code) # 检查是否有需要第二天买入的股票 to_be_bought_codes.clear() if this_phase_codes is not None: for _code in this_phase_codes: if _code not in holding_codes and \ buy_signal.is_match(_code, _date): to_be_bought_codes.add(_code) self.logger.info('待买股票,日期:%s,代码列表:%s', _date, to_be_bought_codes) self.candidatesLogger.info('待买股票,日期:%s,代码列表:%s', _date, to_be_bought_codes) # 计算总市值 total_value = account.get_total_value(_date) # 计算总资产 total_capital = total_value + cash print('收盘后,现金: %10.2f, 总资产: %10.2f' % (cash, total_capital)) # 计算沪深300的增长 hs300_k_current = self.dm.get_k_data('000300', index=True, begin_date=_date, end_date=_date) hs300_current_value = hs300_k_current.loc[ hs300_k_current.index[0]]['close'] last_date = _date df_profit.loc[_date] = { 'net_value': round(total_capital / initial_capital, 2), 'profit': round( 100 * (total_capital - initial_capital) / initial_capital, 2), 'hs300': round( 100 * (hs300_current_value - hs300_begin_value) / hs300_begin_value, 2) } # 打印回测收益曲线数值 print('Profit history start') for index_date in df_profit.index: print('%s, %6.2f, %6.2f' % (index_date, df_profit.loc[index_date]['profit'], df_profit.loc[index_date]['hs300']), flush=True) print('Profit history end') drawdown = self.compute_drawdown(df_profit['net_value']) annual_profit, sharpe_ratio = self.compute_sharpe_ratio( df_profit['net_value']) print('回测结果 %s - %s,年化收益: %7.3f, 最大回撤:%7.3f, 夏普比率:%4.2f' % (self.begin_date, self.end_date, annual_profit, drawdown, sharpe_ratio)) df_profit.plot(title='Backtest Result', y=['profit', 'hs300'], kind='line') plt.show()
def get_option_stocks(self): """ 实现股票池选股逻辑,找到指定日期范围的候选股票 条件:0 < PE < 30, 按从小到大排序,剔除停牌后,取前100个;再平衡周期:7个交易日 :return: tuple,再平衡的日期列表,以及一个dict(key: 再平衡日, value: 当期的股票列表) """ factor_module = FactorModule() dm = DataModule() # 获取日期范围的交易日历 all_dates = get_trading_dates(self.begin_date, self.end_date) # 缓存股票和其对应有交易的日期 code_dates_cache = dict() # 调整日和其对应的股票 rebalance_date_codes_dict = dict() rebalance_dates = [] # 保存上一期的股票池 last_phase_codes = [] # 所有的交易日数 dates_count = len(all_dates) # 用再平衡周期作为步长循环 for index in range(0, dates_count, self.interval): # 当前的调整日 rebalance_date = all_dates[index] # 获取本期符合条件的备选股票 df_pe = factor_module.get_single_date_factors('pe', rebalance_date) df_pe.sort_values('pe', ascending=True, inplace=True) # 只保留小于30的数据 df_pe = df_pe[(0 < df_pe['pe']) & (df_pe['pe'] < 30)] df_pe.set_index(['code'], inplace=True) this_phase_option_codes = list(df_pe.index)[0:100] print(this_phase_option_codes, flush=True) # 本期入选的股票代码列表 this_phase_codes = [] # 找到在上一期的股票池,但是当前停牌的股票,保留在当期股票池中 if len(last_phase_codes) > 0: for code in last_phase_codes: if code not in list(code_dates_cache.keys()): daily_ks = dm.get_k_data(code, autype=None, begin_date=self.begin_date, end_date=self.end_date) daily_ks.set_index(['date'], inplace=True) # 只保留交易日的数据 daily_ks = daily_ks[daily_ks['is_trading']] code_dates_cache[code] = list(daily_ks.index) if rebalance_date not in code_dates_cache[code] or code: this_phase_codes.append(code) print('上期停牌的股票:', flush=True) print(this_phase_codes, flush=True) # 剩余的位置用当前备选股票的 option_size = len(this_phase_option_codes) if option_size > (100 - len(this_phase_codes)): this_phase_codes += this_phase_option_codes[ 0:100 - len(this_phase_codes)] else: this_phase_codes += this_phase_option_codes # 当期股票池作为下次循环的上期股票池 last_phase_codes = this_phase_codes # 保存到返回结果中 rebalance_date_codes_dict[rebalance_date] = this_phase_codes rebalance_dates.append(rebalance_date) print('当前最终的备选票:%s' % rebalance_date, flush=True) print(this_phase_codes, flush=True) return rebalance_dates, rebalance_date_codes_dict
def analyze(self): # 初始化对数据管理子系统接口的调用 dm = DataModule() # 初始化对因子管理子系统接口的调用 fm = FactorModule() # 获取分析周期内的 all_dates = get_trading_dates(self.begin_date, self.end_date) # 首档和末档,股票代码和后复权价格的Dictionary top_dailies = dict() bottom_dailies = dict() # 暂存上一个调整 last_adjust_date = None # 设置沪深300的首日值 hs300_k = dm.get_k_data('000300', index=True, begin_date=all_dates[0], end_date=all_dates[0]) self.hs300_first_value = hs300_k.loc[0]['close'] # 计算每日收益 for index in range(0, len(all_dates), self.interval): adjust_date = all_dates[index] # 获取因子值,按照指定的顺序排序 df_factor = fm.get_single_date_factors(self.factor, adjust_date) if df_factor.index.size == 0: continue df_factor.sort_values(self.factor, ascending=self.ascending, inplace=True) # 将股票代码设为index df_factor.set_index(['code'], inplace=True) # 获取当日所有股票的行情 df_dailies = dm.get_one_day_k_data(autype='hfq', date=adjust_date) # 将code设为index df_dailies.set_index(['code'], inplace=True) # 计算收益 self.compute_profit(last_adjust_date, df_dailies, top_dailies, bottom_dailies, adjust_date) # 删除停牌股票 df_dailies = df_dailies[df_dailies['is_trading']] # 计算每当包含的股票数 total_size = df_dailies.index.size single_position_count = int(total_size / self.position) # 调整首档组合 self.adjust_top_position(top_dailies, df_factor, df_dailies, single_position_count) # 调整末档组合 self.adjust_bottom_position(bottom_dailies, df_factor, df_dailies, single_position_count) # 保存上一个调整日 last_adjust_date = adjust_date # 生成零投资组合的组合收益 self.profit_df[ 'portfolio'] = self.profit_df['top'] - self.profit_df['bottom'] self.draw()