Exemplo n.º 1
0
 def __init__(self,
              coin_pair: CoinPair,
              time_frame,
              exchange_type,
              start_date='2020-03-03 00:00:00',
              end_date=None,
              per_limit=1000,
              database='spot_market'):
     # 时间区间
     self.start_date = DateFormatter.convert_string_to_local_date(
         start_date).astimezone()
     self.end_date = DateFormatter.convert_string_to_local_date(
         end_date) if end_date is not None else datetime.now()
     self.end_date = self.end_date.astimezone()
     # 抓取使用的配置
     self.provider = CCXTProvider("", "", exchange_type)
     self.config = ExchangeFetchingConfiguration(
         coin_pair,
         time_frame,
         1,
         ExchangeFetchingType.FILL_RECENTLY,
         batch_limit=per_limit)
     self.fetcher = ExchangeFetcher(self.provider)
     # table name
     self.measurement_name = influx_market_measurement_name(
         self.provider.display_name, coin_pair.formatted('_'), 'spot',
         time_frame.value)
     self.database_name = database
Exemplo n.º 2
0
 def __init__(self,
              coin_pair,
              time_frame,
              exchange_type,
              start_date='2020-03-03 00:00:00',
              end_date=None,
              per_limit=1000):
     # 时间区间
     self.start_date = DateFormatter.convert_string_to_local_date(
         start_date).astimezone()
     self.end_date = DateFormatter.convert_string_to_local_date(
         end_date) if end_date is not None else datetime.now()
     self.end_date = self.end_date.astimezone()
     # 抓取使用的配置
     self.provider = CCXTProvider("", "", exchange_type)
     self.config = ExchangeFetchingConfiguration(
         coin_pair,
         time_frame,
         1,
         ExchangeFetchingType.FILL_RECENTLY,
         batch_limit=per_limit)
     self.fetcher = ExchangeFetcher(self.provider)
     # table name
     self.candle_cls = candle_record_class_with_components(
         self.provider.display_name, coin_pair, time_frame)
Exemplo n.º 3
0
    def calculation(self, param):
        """单次回测开始"""
        try:
            connect_db_env(db_name=DB_BACKTEST)
            strategy = self.__strategy_cls(**param)
            param_str = ";".join([
                "{}:{}".format(key, param[key]) for key in sorted(param.keys())
            ])
            start_date = dfr.convert_local_date_to_string(
                self.__df.iloc[0][COL_CANDLE_BEGIN_TIME], "%Y%m%d")
            end_date = dfr.convert_local_date_to_string(
                self.__df.iloc[-1][COL_CANDLE_BEGIN_TIME], "%Y%m%d")
            param_identifier = "{}|{}|{}|{}".format(
                strategy.name, param_str, self.__time_frame,
                "{},{}".format(start_date, end_date))

            if len(
                    list(
                        BacktestOverview.objects.raw(
                            {
                                'task_identifier': self.__task_identifier,
                                'param_identifier': param_identifier
                            }))) > 0:
                print("{} 已存在,跳过".format(param_identifier))
                self.unified_exit_handler(True)
                return None

            context = {'param_identifier': param_identifier}

            bt = StrategyBacktest(self.__df.copy(), strategy,
                                  self.__position_func, self.__evaluation_func,
                                  self.__result_handler, context)
            return bt.perform_test()
        except Exception as e:
            self.unified_exit_handler(False, '回测执行失败', str(param))
            print(str(e))
            return None
Exemplo n.º 4
0
 def fetch_historical_candle_data_by_end_date(self,
                                              coin_pair: CoinPair,
                                              time_frame: TimeFrame,
                                              end_date,
                                              limit,
                                              params={}):
     """从结束时间往前推,请求K线"""
     since_ts = dfr.convert_local_date_to_timestamp(end_date)
     since_ts = time_frame.timestamp_backward_offset(since_ts, limit)
     if limit < 900:
         return self.fetch_historical_candle_data(coin_pair, time_frame,
                                                  since_ts, limit + 10,
                                                  params)
     else:
         return self.fetch_real_time_candle_data(coin_pair, time_frame,
                                                 limit, params)
Exemplo n.º 5
0
 def __fill_recently(self):
     """补齐最近的数据"""
     while True:
         assert self.get_latest_date is not None
         # 获取已经保存的最新的日期
         recent_date = self.get_latest_date()
         # 转换到时间戳
         recent_ts = DateFormatter.convert_local_date_to_timestamp(recent_date)
         # 往前移动10个单位作为起始日期
         since_ts = self.configuration.time_frame.timestamp_backward_offset(recent_ts, 10)
         # 需要继续的,暂停一会儿
         if self.__perform_fetching(since_ts):
             time.sleep(self.configuration.sleep_duration)
         else:
             print('FillRecently.finished')
             return
Exemplo n.º 6
0
 def __fetch_historical_data(self):
     """获取历史记录"""
     while True:
         assert self.get_earliest_date is not None
         # 获取最早的日期
         earliest_date = self.get_earliest_date()
         # 转为时间戳
         earliest_ts = DateFormatter.convert_local_date_to_timestamp(earliest_date)
         # 确定抓取的起始时间戳
         since_ts = self.configuration.time_frame.timestamp_backward_offset(
             earliest_ts, self.configuration.batch_limit)
         # 需要继续的,暂停一会儿
         if self.__perform_fetching(since_ts):
             time.sleep(self.configuration.sleep_duration)
         else:
             print('Historical.finished')
             return
Exemplo n.º 7
0
 def record_procedure(self, content):
     content = DateFormatter.now_date_string('[%H:%M:%S] ') + content
     print(content, end='\n\n')
Exemplo n.º 8
0
def __prepare_one_hold(df, _back_hours, _hold_hour, diff_d=[0.3, 0.5]):
    """ 为一个币种一个持币周期添加所有回溯周期的因子数据,并添加该持币周期的所有offset,返回一个 DataFrame
    :param _pkl_file: 整理好基础数据的一个币种的 pkl 文件路径
    :param _back_hours: 回溯周期列表 [3, 4, 6, 8, 12, 24, 48, 60, 72, 96]  关联因子周期
        skew偏度rolling最小周期为3才有数据 所以最小回溯周期设为3
    :param _hold_hour: 持币周期 '2H', '3H', '4H', '6H', '8H', '12H', '24H', '36H', '48H', '60H', '72H'
        上述周期中之一, 关联 添加 offset 标签列
        如果持币周为 2H offset 为 0, 1; 如果持币周为 3H offset 为 0, 1, 2;
    :return: """

    df['涨跌幅'] = df['close'].pct_change()  # 计算涨跌幅
    df['下个周期_avg_price'] = df['avg_price'].shift(-1)  # 计算下根K线开盘买入涨跌幅
    df.loc[df['volume'] == 0, '是否交易'] = 0  # 找出不交易的周期
    df['是否交易'].fillna(value=1, inplace=True)
    """ ******************** 以下是需要修改的代码 ******************** """
    # =====计算各项选币指标
    extra_agg_dict = dict()

    # =====技术指标
    # --- KDJ ---
    for n in _back_hours:
        # 正常K线数据 计算 KDJ
        low_list = df['low'].rolling(
            n, min_periods=1).min()  # 过去n(含当前行)行数据 最低价的最小值
        high_list = df['high'].rolling(
            n, min_periods=1).max()  # 过去n(含当前行)行数据 最高价的最大值
        rsv = (df['close'] - low_list) / (high_list -
                                          low_list) * 100  # 未成熟随机指标值
        df[f'K_bh_{n}'] = rsv.ewm(com=2).mean().shift(1)  # K
        extra_agg_dict[f'K_bh_{n}'] = 'first'
        df[f'D_bh_{n}'] = df[f'K_bh_{n}'].ewm(com=2).mean()  # D
        extra_agg_dict[f'D_bh_{n}'] = 'first'
        df[f'J_bh_{n}'] = 3 * df[f'K_bh_{n}'] - 2 * df[f'D_bh_{n}']  # J
        extra_agg_dict[f'J_bh_{n}'] = 'first'

        #  差分
        # 使用差分后的K线数据 计算 KDJ  --- 标准差变大,数据更不稳定,放弃
        # 用计算后的KDJ指标,再差分  --- 标准差变小,数据更稳定,采纳
        for _ind in ['K', 'D', 'J']:
            __add_diff(_df=df,
                       _d_list=diff_d,
                       _name=f'{_ind}_bh_{n}',
                       _agg_dict=extra_agg_dict,
                       _agg_type='first')

    # --- RSI ---  在期货市场很有效
    close_dif = df['close'].diff()
    df['up'] = np.where(close_dif > 0, close_dif, 0)
    df['down'] = np.where(close_dif < 0, abs(close_dif), 0)
    for n in _back_hours:
        a = df['up'].rolling(n).sum()
        b = df['down'].rolling(n).sum()
        df[f'RSI_bh_{n}'] = (a / (a + b)).shift(1)  # RSI
        extra_agg_dict[f'RSI_bh_{n}'] = 'first'

        # 差分
        # 用计算后的RSI指标,再差分  --- 标准差变小,数据更稳定,采纳
        __add_diff(_df=df,
                   _d_list=diff_d,
                   _name=f'RSI_bh_{n}',
                   _agg_dict=extra_agg_dict,
                   _agg_type='first')

    del df['up'], df['down']  # 删除过程数据

    # ===常见变量
    # --- 均价 ---  对应低价股策略(预计没什么用)
    # 策略改进思路:以下所有用到收盘价的因子,都可尝试使用均价代替
    for n in _back_hours:
        df[f'均价_bh_{n}'] = (df['quote_volume'].rolling(n).sum() /
                            df['volume'].rolling(n).sum()).shift(1)
        extra_agg_dict[f'均价_bh_{n}'] = 'first'

        # 差分
        __add_diff(_df=df,
                   _d_list=diff_d,
                   _name=f'均价_bh_{n}',
                   _agg_dict=extra_agg_dict,
                   _agg_type='first')

    # --- 涨跌幅 ---
    for n in _back_hours:
        df[f'涨跌幅_bh_{n}'] = df['close'].pct_change(n).shift(1)
        extra_agg_dict[f'涨跌幅_bh_{n}'] = 'first'

        # 差分
        __add_diff(_df=df,
                   _d_list=diff_d,
                   _name=f'涨跌幅_bh_{n}',
                   _agg_dict=extra_agg_dict,
                   _agg_type='first')

    # --- bias ---  涨跌幅更好的表达方式 bias 币价偏离均线的比例。
    for n in _back_hours:
        ma = df['close'].rolling(n, min_periods=1).mean()
        df[f'bias_bh_{n}'] = (df['close'] / ma - 1).shift(1)
        extra_agg_dict[f'bias_bh_{n}'] = 'first'

        # 差分
        __add_diff(_df=df,
                   _d_list=diff_d,
                   _name=f'bias_bh_{n}',
                   _agg_dict=extra_agg_dict,
                   _agg_type='first')

    # --- 振幅 ---  最高价最低价
    for n in _back_hours:
        high = df['high'].rolling(n, min_periods=1).max()
        low = df['low'].rolling(n, min_periods=1).min()
        df[f'振幅_bh_{n}'] = (high / low - 1).shift(1)
        extra_agg_dict[f'振幅_bh_{n}'] = 'first'

        # 差分
        __add_diff(_df=df,
                   _d_list=diff_d,
                   _name=f'振幅_bh_{n}',
                   _agg_dict=extra_agg_dict,
                   _agg_type='first')

    # --- 振幅2 ---  收盘价、开盘价
    high = df[['close', 'open']].max(axis=1)
    low = df[['close', 'open']].min(axis=1)
    for n in _back_hours:
        high = high.rolling(n, min_periods=1).max()
        low = low.rolling(n, min_periods=1).min()
        df[f'振幅2_bh_{n}'] = (high / low - 1).shift(1)
        extra_agg_dict[f'振幅2_bh_{n}'] = 'first'

        # 差分
        __add_diff(_df=df,
                   _d_list=diff_d,
                   _name=f'振幅2_bh_{n}',
                   _agg_dict=extra_agg_dict,
                   _agg_type='first')

    # --- 涨跌幅std ---  振幅的另外一种形式
    change = df['close'].pct_change()
    for n in _back_hours:
        df[f'涨跌幅std_bh_{n}'] = change.rolling(n).std().shift(1)
        extra_agg_dict[f'涨跌幅std_bh_{n}'] = 'first'

        # 差分
        __add_diff(_df=df,
                   _d_list=diff_d,
                   _name=f'涨跌幅std_bh_{n}',
                   _agg_dict=extra_agg_dict,
                   _agg_type='first')

    # --- 涨跌幅skew ---  在商品期货市场有效
    # skew偏度rolling最小周期为3才有数据
    for n in _back_hours:
        df[f'涨跌幅skew_bh_{n}'] = change.rolling(n).skew().shift(1)
        extra_agg_dict[f'涨跌幅skew_bh_{n}'] = 'first'

        # 差分
        __add_diff(_df=df,
                   _d_list=diff_d,
                   _name=f'涨跌幅skew_bh_{n}',
                   _agg_dict=extra_agg_dict,
                   _agg_type='first')

    # --- 成交额 ---  对应小市值概念
    for n in _back_hours:
        df[f'成交额_bh_{n}'] = df['quote_volume'].rolling(
            n, min_periods=1).sum().shift(1)
        extra_agg_dict[f'成交额_bh_{n}'] = 'first'

        # 差分
        __add_diff(_df=df,
                   _d_list=diff_d,
                   _name=f'成交额_bh_{n}',
                   _agg_dict=extra_agg_dict,
                   _agg_type='first')

    # --- 成交额std ---  191选股因子中最有效的因子
    for n in _back_hours:
        df[f'成交额std_bh_{n}'] = df['quote_volume'].rolling(
            n, min_periods=2).std().shift(1)
        extra_agg_dict[f'成交额std_bh_{n}'] = 'first'

        # 差分
        __add_diff(_df=df,
                   _d_list=diff_d,
                   _name=f'成交额std_bh_{n}',
                   _agg_dict=extra_agg_dict,
                   _agg_type='first')

    # --- 资金流入比例 --- 币安独有的数据
    for n in _back_hours:
        volume = df['quote_volume'].rolling(n, min_periods=1).sum()
        buy_volume = df['taker_buy_quote_asset_volume'].rolling(
            n, min_periods=1).sum()
        df[f'资金流入比例_bh_{n}'] = (buy_volume / volume).shift(1)
        extra_agg_dict[f'资金流入比例_bh_{n}'] = 'first'

        # 差分
        __add_diff(_df=df,
                   _d_list=diff_d,
                   _name=f'资金流入比例_bh_{n}',
                   _agg_dict=extra_agg_dict,
                   _agg_type='first')

    # --- 量比 ---
    for n in _back_hours:
        df[f'量比_bh_{n}'] = (
            df['quote_volume'] /
            df['quote_volume'].rolling(n, min_periods=1).mean()).shift(1)
        extra_agg_dict[f'量比_bh_{n}'] = 'first'

        # 差分
        __add_diff(_df=df,
                   _d_list=diff_d,
                   _name=f'量比_bh_{n}',
                   _agg_dict=extra_agg_dict,
                   _agg_type='first')

    # --- 成交笔数 ---
    for n in _back_hours:
        df[f'成交笔数_bh_{n}'] = df['trade_num'].rolling(
            n, min_periods=1).sum().shift(1)
        extra_agg_dict[f'成交笔数_bh_{n}'] = 'first'

        # 差分
        __add_diff(_df=df,
                   _d_list=diff_d,
                   _name=f'成交笔数_bh_{n}',
                   _agg_dict=extra_agg_dict,
                   _agg_type='first')

    # --- 量价相关系数 ---  量价相关选股策略
    for n in _back_hours:
        df[f'量价相关系数_bh_{n}'] = df['close'].rolling(n).corr(
            df['quote_volume']).shift(1)
        extra_agg_dict[f'量价相关系数_bh_{n}'] = 'first'

        # 差分
        __add_diff(_df=df,
                   _d_list=diff_d,
                   _name=f'量价相关系数_bh_{n}',
                   _agg_dict=extra_agg_dict,
                   _agg_type='first')

    # --- Angle ---
    for n in _back_hours:
        column_name = f'Angle_bh_{n}'
        ma = df['close'].rolling(window=n, min_periods=1).mean()
        df[column_name] = ta.LINEARREG_ANGLE(ma, n)
        df[column_name] = df[column_name].shift(1)
        extra_agg_dict[column_name] = 'first'

        # 差分
        __add_diff(_df=df,
                   _d_list=diff_d,
                   _name=column_name,
                   _agg_dict=extra_agg_dict,
                   _agg_type='first')

    # ---- GapTrue ----
    for n in _back_hours:
        ma = df['close'].rolling(window=n, min_periods=1).mean()
        wma = ta.WMA(df['close'], n)
        gap = wma - ma
        column_name = f'GapTrue_bh_{n}'
        df[column_name] = gap / abs(gap).rolling(window=n).sum()
        df[column_name] = df[column_name].shift(1)
        extra_agg_dict[column_name] = 'first'

        # 差分
        __add_diff(_df=df,
                   _d_list=diff_d,
                   _name=column_name,
                   _agg_dict=extra_agg_dict,
                   _agg_type='first')

    # ---- 癞子 ----
    for n in _back_hours:
        diff = df['close'] / df['close'].shift(1) - 1
        column_name = f'癞子_bh_{n}'
        df[column_name] = diff / abs(diff).rolling(window=n).sum()
        df[column_name] = df[column_name].shift(1)
        extra_agg_dict[column_name] = 'first'

        # 差分
        __add_diff(_df=df,
                   _d_list=diff_d,
                   _name=column_name,
                   _agg_dict=extra_agg_dict,
                   _agg_type='first')

    # ---- CCI ----
    for n in _back_hours:
        oma = ta.WMA(df.open, n)
        hma = ta.WMA(df.high, n)
        lma = ta.WMA(df.low, n)
        cma = ta.WMA(df.close, n)
        tp = (hma + lma + cma + oma) / 4
        ma = ta.WMA(tp, n)
        md = ta.WMA(abs(cma - ma), n)

        column_name = f'CCI_bh_{n}'
        df[column_name] = (tp - ma) / md
        df[column_name] = df[column_name].shift(1)
        extra_agg_dict[column_name] = 'first'

        # 差分
        __add_diff(_df=df,
                   _d_list=diff_d,
                   _name=column_name,
                   _agg_dict=extra_agg_dict,
                   _agg_type='first')
    """ ******************** 以上是需要修改的代码 ******************** """
    # ===将数据转化为需要的周期
    # 在数据最前面,增加一行数据,这是为了在对>24h的周期进行resample时,保持数据的一致性。
    df = df.loc[0:0, :].append(df, ignore_index=True)
    df.loc[0, 'candle_begin_time'] = dfr.convert_string_to_local_date(
        '2020-01-01 00:00:00').replace(tzinfo=pytz.utc)

    # 转换周期
    df['周期开始时间'] = df['candle_begin_time']
    df.set_index('candle_begin_time', inplace=True)

    # 必备字段
    agg_dict = {
        'symbol': 'first',
        '周期开始时间': 'first',
        'open': 'first',
        'avg_price': 'first',
        'close': 'last',
        '下个周期_avg_price': 'last',
        'volume': 'sum',
    }
    agg_dict = dict(agg_dict, **extra_agg_dict)  # 需要保留的列 必备字段 + 因子字段

    # 不同的offset,进行resample
    # 不管何种持币周期,将所有 offset 合并为一个 DataFrame后 一天中的数据行数都是相同的
    # 一天的数据行数为 当天的币种数*24,
    # 例如在2020-12-15有77个币在交易,不管持币周期是2H还是3H,合并后的DataFrame数据行数都是1848(24*77) 因为
    # 对于持币周期为2H来说,offset0 offset1 共计两批换仓点
    # offset0 换仓点在 0,2,4,6,8,10,12,14,16,18,20,22
    # offset1 换仓点在 1,3,5,7,9,11,13,15,17,19,21,23
    # 所以在一天中的每个整点都有所有交易对的换仓发生
    # 对于持币周期为3H来说,offset0 offset1 offset2 共计三批换仓点
    # offset0 换仓点在 0,3,6,9,12,15,18,21
    # offset1 换仓点在 1,4,7,10,13,16,19,22
    # offset2 换仓点在 2,5,8,11,14,17,20,23
    # 所以在一天中的每个整点都有所有交易对的换仓发生
    # 对于持币周期大于一天的来说,持币周期内相当于一天,例如48H,还是有48个offset 共计48批换仓点
    # 所以在固定长度日期范围内,所有持币周期都将有相同的数据行数
    # 所以不管何种持币周期,生成的合并 pkl 大小基本相同
    period_df_list = []
    for offset in range(int(_hold_hour[:-1])):
        # 转换持币周期的 df 其列为 必备字段 + 因子字段
        # 开始时间为 2010-01-01 00:00:00  在有交易数据的之前 除candle_begin_time列皆为 NaN

        # 在24h之内 base 和 offset 一样
        # period_df = df.resample(_hold_hour, offset=f'{offset}h').agg(agg_dict)
        period_df = df.resample(_hold_hour, base=offset).agg(agg_dict)

        # 上一行代码对数据转换完周期后,刚开始会有大量的空数据,没有必要对这些数据进行删除,因为后面会删除18年之前的数据。
        # 添加 offset 标签列
        # 如果持币周为 2 offset 为 0, 1; 如果持币周为 3 offset 为 0, 1, 2;
        period_df['offset'] = offset

        # 原版自适应布林通道
        n = 34
        period_df['close_shift'] = period_df['close'].shift(1)
        period_df['median'] = period_df['close_shift'].rolling(window=n).mean()
        period_df['std'] = period_df['close_shift'].rolling(
            n, min_periods=1).std(ddof=0)  # ddof代表标准差自由度
        period_df['z_score'] = abs(period_df['close_shift'] -
                                   period_df['median']) / period_df['std']
        period_df['up'] = period_df['z_score'].rolling(
            window=n, min_periods=1).max().shift(1)
        period_df['dn'] = period_df['z_score'].rolling(
            window=n, min_periods=1).min().shift(1)
        period_df[
            'upper'] = period_df['median'] + period_df['std'] * period_df['up']
        period_df[
            'lower'] = period_df['median'] - period_df['std'] * period_df['up']
        period_df['condition_long'] = period_df['close_shift'] >= period_df[
            'lower']  # 破下轨,不做多
        period_df['condition_short'] = period_df['close_shift'] <= period_df[
            'upper']  # 破上轨,不做空

        period_df.reset_index(inplace=True)

        # 合并数据
        period_df_list.append(period_df)

    # 将不同offset的数据,合并到一张表
    period_df = pd.concat(period_df_list, ignore_index=True)
    period_df.reset_index(inplace=True)

    # 删除一些数据
    period_df = period_df.iloc[24:]  # 刚开始交易前24个周期删除
    period_df = period_df[period_df['candle_begin_time'] >= pd.to_datetime(
        '2020-09-01').tz_localize(pytz.utc)]
    period_df.reset_index(drop=True, inplace=True)
    del period_df['index']
    return period_df