示例#1
0
def build(ohlcv: pd.DataFrame, **kwargs):
    W = kwargs.get('W', 10)
    ohlc = ohlcv[['open', 'high', 'low', 'close']]
    lagged_ohlc = pd.concat(
        [ohlc] + [make_lagged(ohlc, i) for i in range(1, W + 1)],
        axis='columns',
        verify_integrity=True,
        sort=True,
        join='inner'
    )
    ta = get_ta_features(ohlcv, TA_CONFIG)
    return pd.concat([lagged_ohlc, ta], axis='columns', verify_integrity=True, sort=True, join='inner')
示例#2
0
def build(ohlcv: pd.DataFrame, coinmetrics: pd.DataFrame, **kwargs):
    W = kwargs.get('W', 10)
    # ATSA - OHLC with 10-lag + TA
    ohlc = ohlcv[['open', 'high', 'low', 'close']]
    lagged_ohlc = pd.concat([ohlc] +
                            [make_lagged(ohlc, i) for i in range(1, W + 1)],
                            axis='columns',
                            verify_integrity=True,
                            sort=True,
                            join='inner')
    ta = get_ta_features(ohlcv, TA_CONFIG)

    # Lagged percent variation of OHLCV
    ohlcv_pct = ohlcv[['open', 'high', 'low', 'close', 'volume']].pct_change()
    ohlcv_pct.columns = ['{}_pct'.format(c) for c in ohlcv_pct.columns]
    lagged_ohlcv_pct = pd.concat(
        [ohlcv_pct] + [make_lagged(ohlcv_pct, i) for i in range(1, W + 1)],
        axis='columns',
        verify_integrity=True,
        sort=True,
        join='inner')

    # TALib Patterns on OHLCV data
    _patterns = get_talib_patterns(ohlcv)
    ohlc_patterns = pd.DataFrame(index=ohlcv.index)
    ohlc_patterns['talib_patterns_mean'] = _patterns.mean(axis=1)
    ohlc_patterns['talib_patterns_sum'] = _patterns.sum(axis=1)

    # Residual from STL Decomposition of OHLC data
    ohlc_residuals = pd.DataFrame()
    ohlc_residuals['open_resid'] = get_residual(ohlcv.open)
    ohlc_residuals['high_resid'] = get_residual(ohlcv.high)
    ohlc_residuals['low_resid'] = get_residual(ohlcv.low)
    ohlc_residuals['close_resid'] = get_residual(ohlcv.close)

    # SPLINES
    # Use SPLINES to extract price information
    ohlc_splines = pd.DataFrame(index=ohlcv.index)
    # First derivative indicates slope
    ohlc_splines['open_spl_d1'] = get_spline(ohlcv.open, 1)
    ohlc_splines['high_spl_d1'] = get_spline(ohlcv.high, 1)
    ohlc_splines['low_spl_d1'] = get_spline(ohlcv.low, 1)
    ohlc_splines['close_spl_d1'] = get_spline(ohlcv.close, 1)
    # Second derivative indicates convexity
    ohlc_splines['open_spl_d2'] = get_spline(ohlcv.open, 2)
    ohlc_splines['high_spl_d2'] = get_spline(ohlcv.high, 2)
    ohlc_splines['low_spl_d2'] = get_spline(ohlcv.low, 2)
    ohlc_splines['close_spl_d2'] = get_spline(ohlcv.close, 2)

    # Relevant stats from OHLC data interpretation
    ohlcv_stats = pd.DataFrame(index=ohlcv.index)
    ohlcv_stats['close_open_pct'] = (ohlcv.close - ohlcv.open).pct_change(
    )  # Change in body of the candle (> 0 if candle is green)
    ohlcv_stats['high_close_dist_pct'] = (ohlcv.high - ohlcv.close).pct_change(
    )  # Change in wick size of the candle, shorter wick should be bullish
    ohlcv_stats['low_close_dist_pct'] = (ohlcv.close - ohlcv.low).pct_change(
    )  # Change in shadow size of the candle, this increasing would indicate support (maybe a bounce)
    ohlcv_stats['high_low_dist_pct'] = (ohlcv.high - ohlcv.low).pct_change(
    )  # Change in total candle size, smaller candles stands for low volatility
    ohlcv_stats['close_volatility_3d'] = ohlcv.close.pct_change().rolling(
        3).std(ddof=0)
    ohlcv_stats['close_volatility_7d'] = ohlcv.close.pct_change().rolling(
        7).std(ddof=0)
    ohlcv_stats['close_volatility_30d'] = ohlcv.close.pct_change().rolling(
        30).std(ddof=0)
    ## Dropped these features because they exclude too much data! Enable if building with MORE DATA!
    # if ohlcv.close.shape[0] > 90:
    #     ohlcv_stats['close_volatility_90d'] = ohlcv.close.pct_change().rolling(90).std(ddof=0)
    # if ohlcv.close.shape[0] > 180:
    #     ohlcv_stats['close_volatility_180d'] = ohlcv.close.pct_change().rolling(180).std(ddof=0)
    # if ohlcv.close.shape[0] > 360:
    #     ohlcv_stats['close_volatility_360d'] = ohlcv.close.pct_change().rolling(360).std(ddof=0)

    # Stats from resampled OHLC data
    for d in [3, 7, 30]:
        ohlcv_d = ohlcv_resample(ohlcv=ohlcv, period=d, interval='D')
        ohlcv_stats['close_open_pct_d{}'.format(d)] = (
            ohlcv_d.close - ohlcv_d.open).pct_change()
        ohlcv_stats['high_close_dist_pct_d{}'.format(d)] = (
            ohlcv_d.high - ohlcv_d.close).pct_change()
        ohlcv_stats['low_close_dist_pct_d{}'.format(d)] = (
            ohlcv_d.close - ohlcv_d.low).pct_change()
        ohlcv_stats['high_low_dist_pct_d{}'.format(d)] = (
            ohlcv_d.high - ohlcv_d.low).pct_change()

    # Cherry-picked and engineered features from technical indicators
    ta_picks = pd.DataFrame(index=ta.index)
    # REMA / RSMA are already used and well-estabilished in ATSA,
    # I'm taking the pct change since i want to encode the relative movement of the ema's not their positions
    # Drop other dimensions since they're correlated
    ta_picks['rema_8_15_pct'] = ta.rema_8_15.pct_change()
    ta_picks['rsma_8_15_pct'] = ta.rsma_8_15.pct_change()

    # Stoch is a momentum indicator comparing a particular closing price of a security to a range of its prices
    # over a certain period of time.
    # The sensitivity of the oscillator to market movements is reducible by adjusting that time period or
    # by taking a moving average of the result.
    # It is used to generate overbought and oversold trading signals, utilizing a 0-100 bounded range of values.
    # IDEA => decrease sensitivity by 3-mean and divide by 100 to get fp values
    ta_picks['stoch_14_mean3_div100'] = ta.stoch_14.rolling(3).mean() / 100

    # Moving Average Convergence Divergence (MACD) is a trend-following momentum indicator that shows
    # the relationship between two moving averages of a security’s price.
    # The MACD is calculated by subtracting the 26-period Exponential Moving Average (EMA) from the 12-period EMA.
    #  A nine-day EMA of the MACD called the "signal line," is then plotted on top of the MACD line,
    #  which can function as a trigger for buy and sell signals.
    #  Traders may buy the security when the MACD crosses above its signal line and sell - or short - the security
    #  when the MACD crosses below the signal line.
    #  Moving Average Convergence Divergence (MACD) indicators can be interpreted in several ways,
    #  but the more common methods are crossovers, divergences, and rapid rises/falls.
    signal_line = exponential_moving_average(ta.macd_12_26, 9)
    ta_picks['macd_12_26_signal'] = signal_line
    ta_picks['macd_12_26_diff_signal'] = (ta.macd_12_26 -
                                          signal_line).pct_change()
    ta_picks['macd_12_26_pct'] = ta.macd_12_26.pct_change()

    # PPO is identical to the moving average convergence divergence (MACD) indicator,
    # except the PPO measures percentage difference between two EMAs, while the MACD measures absolute (dollar) difference.
    signal_line = exponential_moving_average(ta.ppo_12_26, 9)
    ta_picks['ppo_12_26_signal'] = signal_line
    ta_picks['ppo_12_26_diff_signal'] = (ta.ppo_12_26 -
                                         signal_line).pct_change()
    ta_picks['ppo_12_26_pct'] = ta.ppo_12_26.pct_change()

    # ADI Accumulation/distribution is a cumulative indicator that uses volume and price to assess whether
    # a stock is being accumulated or distributed.
    # The accumulation/distribution measure seeks to identify divergences between the stock price and volume flow.
    # This provides insight into how strong a trend is. If the price is rising but the indicator is falling
    # this indicates that buying or accumulation volume may not be enough to support
    # the price rise and a price decline could be forthcoming.
    # ==> IDEA: if we can fit a line to the price y1 = m1X+q1 and a line to ADI y2=m2X+q2 then we can identify
    #           divergences by simply looking at the sign of M.
    #           Another insight would be given by the slope (ie pct_change)
    ta_picks['adi_pct'] = ta.adi.pct_change()
    ta_picks['adi_close_convergence'] = convergence_between_series(
        ta.adi, ohlcv.close, 3)

    # RSI goes from 0 to 100, values <= 20 mean BUY, while values >= 80 mean SELL.
    # Dividing it by 100 to get a floating point feature, makes no sense to pct_change it
    ta_picks['rsi_14_div100'] = ta.rsi_14 / 100

    # The Money Flow Index (MFI) is a technical indicator that generates overbought or oversold
    #   signals using both prices and volume data. The oscillator moves between 0 and 100.
    # An MFI reading above 80 is considered overbought and an MFI reading below 20 is considered oversold,
    #   although levels of 90 and 10 are also used as thresholds.
    # A divergence between the indicator and price is noteworthy. For example, if the indicator is rising while
    #   the price is falling or flat, the price could start rising.
    ta_picks['mfi_14_div100'] = ta.mfi_14 / 100

    # The Chande momentum oscillator is a technical momentum indicator similar to other momentum indicators
    #   such as Wilder’s Relative Strength Index (Wilder’s RSI) and the Stochastic Oscillator.
    #   It measures momentum on both up and down days and does not smooth results, triggering more frequent
    #   oversold and overbought penetrations. The indicator oscillates between +100 and -100.
    # Many technical traders add a 10-period moving average to this oscillator to act as a signal line.
    #   The oscillator generates a bullish signal when it crosses above the moving average and a
    #   bearish signal when it drops below the moving average.
    ta_picks['cmo_14_div100'] = ta.cmo_14 / 100
    signal_line = simple_moving_average(ta.cmo_14, 10)
    ta_picks['cmo_14_signal'] = signal_line
    ta_picks['cmo_14_diff_signal'] = (ta.cmo_14 - signal_line) / 100

    # On-balance volume (OBV) is a technical trading momentum indicator that uses volume flow to predict changes in stock price.
    # Eventually, volume drives the price upward. At that point, larger investors begin to sell, and smaller investors begin buying.
    # Despite being plotted on a price chart and measured numerically,
    # the actual individual quantitative value of OBV is not relevant.
    # The indicator itself is cumulative, while the time interval remains fixed by a dedicated starting point,
    # meaning the real number value of OBV arbitrarily depends on the start date.
    # Instead, traders and analysts look to the nature of OBV movements over time;
    # the slope of the OBV line carries all of the weight of analysis. => We want percent change
    ta_picks['obv_pct'] = ta.obv.pct_change()
    ta_picks['obv_mean3_pct'] = ta.obv.rolling(3).mean().pct_change()

    # Strong rallies in price should see the force index rise.
    # During pullbacks and sideways movements, the force index will often fall because the volume
    # and/or the size of the price moves gets smaller.
    # => Encoding the percent variation could be a good idea
    ta_picks['fi_13_pct'] = ta.fi_13.pct_change()
    ta_picks['fi_50_pct'] = ta.fi_50.pct_change()

    # The Aroon Oscillator is a trend-following indicator that uses aspects of the
    # Aroon Indicator (Aroon Up and Aroon Down) to gauge the strength of a current trend
    # and the likelihood that it will continue.
    # It moves between -100 and 100. A high oscillator value is an indication of an uptrend
    # while a low oscillator value is an indication of a downtrend.
    ta_picks['ao_14_div100'] = ta.ao_14 / 100

    # The average true range (ATR) is a technical analysis indicator that measures market volatility
    #   by decomposing the entire range of an asset price for that period.
    # ATRP is pct_change of volatility
    # ta_picks['atrp_14'] = ta.atrp_14  # We include whole TA

    # Percentage Volume Oscillator (PVO) is momentum volume oscillator used in technical analysis
    #   to evaluate and measure volume surges and to compare trading volume to the average longer-term volume.
    # PVO does not analyze price and it is based solely on volume.
    #  It compares fast and slow volume moving averages by showing how short-term volume differs from
    #  the average volume over longer-term.
    #  Since it does not care a trend's factor in its calculation (only volume data are used)
    #  this technical indicator cannot be used alone to predict changes in a trend.
    # ta_picks['pvo_12_26'] = ta.pvo_12_26 # We include whole TA

    merge_dataframes = [
        lagged_ohlc, lagged_ohlcv_pct, ohlc_patterns, ohlc_residuals,
        ohlc_splines, ohlcv_stats, ta, ta_picks
    ]
    # Cherry-picked and engineered features from Blockchain data
    # >> Only use blockchain data if we don't lose too many points, at most 60
    if coinmetrics is not None and (coinmetrics.shape[0] -
                                    ohlcv.shape[0]) > -60:
        cm_picks = pd.DataFrame(index=ohlcv.index)
        if 'adractcnt' in coinmetrics.columns:
            cm_picks['adractcnt_pct'] = coinmetrics.adractcnt.pct_change()
        if 'txtfrvaladjntv' in coinmetrics.columns and 'isstotntv' in coinmetrics.columns and 'feetotntv' in coinmetrics.columns:
            # I want to represent miners earnings (fees + issued coins) vs amount transacted in that interval
            cm_picks['earned_vs_transacted'] = (
                coinmetrics.isstotntv +
                coinmetrics.feetotntv) / coinmetrics.txtfrvaladjntv
        if 'isstotntv' in coinmetrics.columns:
            # isstotntv is total number of coins mined in the time interval
            # splycur is total number of coins mined (all time)
            total_mined = coinmetrics.isstotntv.rolling(
                365, min_periods=7).sum()  # total mined in a year
            cm_picks['isstot1_isstot365_pct'] = (coinmetrics.isstotntv /
                                                 total_mined).pct_change()
        if 'splycur' in coinmetrics.columns and 'isstotntv' in coinmetrics.columns:
            cm_picks['splycur_isstot1_pct'] = (
                coinmetrics.isstotntv / coinmetrics.splycur).pct_change()
        if 'hashrate' in coinmetrics.columns:
            cm_picks['hashrate_pct'] = coinmetrics.hashrate.pct_change()
        # if 'roi30d' in coinmetrics.columns:
        #    cm_picks['roi30d'] = coinmetrics.roi30d
        if 'isstotntv' in coinmetrics.columns:
            cm_picks['isstotntv_pct'] = coinmetrics.isstotntv.pct_change()
        if 'feetotntv' in coinmetrics.columns:
            cm_picks['feetotntv_pct'] = coinmetrics.feetotntv.pct_change()
        if 'txtfrcnt' in coinmetrics.columns:
            cm_picks['txtfrcnt_pct'] = coinmetrics.txtfrcnt.pct_change()
        # if 'vtydayret30d' in coinmetrics.columns:
        #    cm_picks['vtydayret30d'] = coinmetrics.vtydayret30d
        #if 'isscontpctann' in coinmetrics.columns:
        #    cm_picks['isscontpctann'] = coinmetrics.isscontpctann
        # merge_dataframes.append(coinmetrics)
        merge_dataframes.append(cm_picks)

    # Drop columns whose values are all nan or inf from each facet
    with pd.option_context('mode.use_inf_as_na',
                           True):  # Set option temporarily
        for _df in merge_dataframes:
            _df.dropna(axis='columns', how='all', inplace=True)
    return pd.concat(merge_dataframes,
                     axis='columns',
                     verify_integrity=True,
                     sort=True,
                     join='inner')
示例#3
0
def build(ohlcv: pd.DataFrame, coinmetrics: pd.DataFrame, **kwargs):
    W = kwargs.get('W', 10)
    ta = get_ta_features(ohlcv, TA_CONFIG)

    ohlcv_stats = pd.DataFrame(index=ohlcv.index)
    # Showld always be > 0, price oscillation range for current day
    ohlcv_stats['day_range_pct'] = (ohlcv.high - ohlcv.low).pct_change()
    # Price direction for the day green > 0, red < 0. Modulus is range.
    ohlcv_stats['direction'] = ohlcv.close - ohlcv.open

    cv_pct = ohlcv[['close', 'volume']].pct_change()
    lagged_cv_pct = pd.concat(
        [cv_pct] + [make_lagged(cv_pct, i) for i in range(1, W + 1)],
        axis='columns',
        verify_integrity=True,
        sort=True,
        join='inner'
    )
    # Cherry-picked and engineered features from Blockchain data
    cm_picks = pd.DataFrame(index=ohlcv.index)
    if 'adractcnt' in coinmetrics.columns:
        cm_picks['adractcnt_pct'] = coinmetrics.adractcnt.pct_change()
    if 'txtfrvaladjntv' in coinmetrics.columns and 'isstotntv' in coinmetrics.columns and 'feetotntv' in coinmetrics.columns:
        # I want to represent miners earnings (fees + issued coins) vs amount transacted in that interval
        cm_picks['earned_vs_transacted'] = (coinmetrics.isstotntv + coinmetrics.feetotntv) / coinmetrics.txtfrvaladjntv
    if 'isstotntv' in coinmetrics.columns:
        # isstotntv is total number of coins mined in the time interval
        # splycur is total number of coins mined (all time)
        total_mined = coinmetrics.isstotntv.rolling(365, min_periods=7).sum()  # total mined in a year
        cm_picks['isstot1_isstot365_pct'] = (coinmetrics.isstotntv / total_mined).pct_change()
    if 'splycur' in coinmetrics.columns and 'isstotntv' in coinmetrics.columns:
        cm_picks['splycur_isstot1_pct'] = (coinmetrics.isstotntv / coinmetrics.splycur).pct_change()
    if 'hashrate' in coinmetrics.columns:
        cm_picks['hashrate_pct'] = coinmetrics.hashrate.pct_change()
    if 'roi30d' in coinmetrics.columns:
        cm_picks['roi30d'] = coinmetrics.roi30d
    if 'isstotntv' in coinmetrics.columns:
        cm_picks['isstotntv_pct'] = coinmetrics.isstotntv.pct_change()
    if 'feetotntv' in coinmetrics.columns:
        cm_picks['feetotntv_pct'] = coinmetrics.feetotntv.pct_change()
    if 'txtfrcount' in coinmetrics.columns:
        cm_picks['txtfrcount_pct'] = coinmetrics.txtfrcount.pct_change()
        cm_picks['txtfrcount_volume'] = coinmetrics.txtfrcount.pct_change()
    if 'vtydayret30d' in coinmetrics.columns:
        cm_picks['vtydayret30d'] = coinmetrics.vtydayret30d
    if 'isscontpctann' in coinmetrics.columns:
        cm_picks['isscontpctann'] = coinmetrics.isscontpctann
    
    # Cherry-picked and engineered features from technical indicators
    ta_picks = pd.DataFrame(index=ta.index)
    # REMA / RSMA are already used and well-estabilished in ATSA,
    # I'm taking the pct change since i want to encode the relative movement of the ema's not their positions
    # Drop other dimensions since they're correlated
    ta_picks['rema_8_15_pct'] = ta.rema_8_15.pct_change()
    ta_picks['rsma_8_15_pct'] = ta.rema_8_15.pct_change()

    # Stoch is a momentum indicator comparing a particular closing price of a security to a range of its prices
    # over a certain period of time.
    # The sensitivity of the oscillator to market movements is reducible by adjusting that time period or
    # by taking a moving average of the result.
    # It is used to generate overbought and oversold trading signals, utilizing a 0-100 bounded range of values.
    # IDEA => decrease sensitivity by 3-mean and divide by 100 to get fp values
    ta_picks['stoch_14_mean3_div100'] = ta.stoch_14.rolling(3).mean() / 100

    # Moving Average Convergence Divergence (MACD) is a trend-following momentum indicator that shows
    # the relationship between two moving averages of a security’s price.
    # The MACD is calculated by subtracting the 26-period Exponential Moving Average (EMA) from the 12-period EMA.
    #  A nine-day EMA of the MACD called the "signal line," is then plotted on top of the MACD line,
    #  which can function as a trigger for buy and sell signals.
    #  Traders may buy the security when the MACD crosses above its signal line and sell - or short - the security
    #  when the MACD crosses below the signal line.
    #  Moving Average Convergence Divergence (MACD) indicators can be interpreted in several ways,
    #  but the more common methods are crossovers, divergences, and rapid rises/falls.
    signal_line = exponential_moving_average(ta.macd_12_26, 9)
    ta_picks['macd_12_26_signal'] = (ta.macd_12_26 - signal_line).pct_change()  # Relationship with signal line
    ta_picks['macd_12_26_pct'] = ta.macd_12_26.pct_change()  # Information about slope

    # PPO is identical to the moving average convergence divergence (MACD) indicator,
    # except the PPO measures percentage difference between two EMAs, while the MACD measures absolute (dollar) difference.
    signal_line = exponential_moving_average(ta.ppo_12_26, 9)
    ta_picks['ppo_12_26_signal'] = (ta.ppo_12_26 - signal_line).pct_change()  # Relationship with signal line
    ta_picks['ppo_12_26_pct'] = ta.ppo_12_26.pct_change()  # Information about slope

    # ADI Accumulation/distribution is a cumulative indicator that uses volume and price to assess whether
    # a stock is being accumulated or distributed.
    # The accumulation/distribution measure seeks to identify divergences between the stock price and volume flow.
    # This provides insight into how strong a trend is. If the price is rising but the indicator is falling
    # this indicates that buying or accumulation volume may not be enough to support
    # the price rise and a price decline could be forthcoming.
    # ==> IDEA: if we can fit a line to the price y1 = m1X+q1 and a line to ADI y2=m2X+q2 then we can identify
    #           divergences by simply looking at the sign of M.
    #           Another insight would be given by the slope (ie pct_change)
    ta_picks['adi_pct'] = ta.adi.pct_change()
    ta_picks['adi_close_convergence'] = convergence_between_series(ta.adi, ohlcv.close, 3)

    # RSI goes from 0 to 100, values <= 20 mean BUY, while values >= 80 mean SELL.
    # Dividing it by 100 to get a floating point feature, makes no sense to pct_change it
    ta_picks['rsi_14_div100'] = ta.rsi_14 / 100

    # The Money Flow Index (MFI) is a technical indicator that generates overbought or oversold
    #   signals using both prices and volume data. The oscillator moves between 0 and 100.
    # An MFI reading above 80 is considered overbought and an MFI reading below 20 is considered oversold,
    #   although levels of 90 and 10 are also used as thresholds.
    # A divergence between the indicator and price is noteworthy. For example, if the indicator is rising while
    #   the price is falling or flat, the price could start rising.
    ta_picks['mfi_14_div100'] = ta.mfi_14 / 100

    # The Chande momentum oscillator is a technical momentum indicator similar to other momentum indicators
    #   such as Wilder’s Relative Strength Index (Wilder’s RSI) and the Stochastic Oscillator.
    #   It measures momentum on both up and down days and does not smooth results, triggering more frequent
    #   oversold and overbought penetrations. The indicator oscillates between +100 and -100.
    # Many technical traders add a 10-period moving average to this oscillator to act as a signal line.
    #   The oscillator generates a bullish signal when it crosses above the moving average and a
    #   bearish signal when it drops below the moving average.
    ta_picks['cmo_14_div100'] = ta.cmo_14 / 100
    signal_line = simple_moving_average(ta.cmo_14, 10)
    ta_picks['cmo_14_signal'] = (ta.cmo_14 - signal_line) / 100

    # On-balance volume (OBV) is a technical trading momentum indicator that uses volume flow to predict changes in stock price.
    # Eventually, volume drives the price upward. At that point, larger investors begin to sell, and smaller investors begin buying.
    # Despite being plotted on a price chart and measured numerically,
    # the actual individual quantitative value of OBV is not relevant.
    # The indicator itself is cumulative, while the time interval remains fixed by a dedicated starting point,
    # meaning the real number value of OBV arbitrarily depends on the start date.
    # Instead, traders and analysts look to the nature of OBV movements over time;
    # the slope of the OBV line carries all of the weight of analysis. => We want percent change
    ta_picks['obv_pct'] = ta.obv.pct_change()
    ta_picks['obv_mean3_pct'] = ta.obv.rolling(3).mean().pct_change()

    # Strong rallies in price should see the force index rise.
    # During pullbacks and sideways movements, the force index will often fall because the volume
    # and/or the size of the price moves gets smaller.
    # => Encoding the percent variation could be a good idea
    ta_picks['fi_13_pct'] = ta.fi_13.pct_change()
    ta_picks['fi_50_pct'] = ta.fi_50.pct_change()

    # The Aroon Oscillator is a trend-following indicator that uses aspects of the
    # Aroon Indicator (Aroon Up and Aroon Down) to gauge the strength of a current trend
    # and the likelihood that it will continue.
    # It moves between -100 and 100. A high oscillator value is an indication of an uptrend
    # while a low oscillator value is an indication of a downtrend.
    ta_picks['ao_14'] = ta.ao_14 / 100

    # The average true range (ATR) is a technical analysis indicator that measures market volatility
    #   by decomposing the entire range of an asset price for that period.
    # ATRP is pct_change of volatility
    ta_picks['atrp_14'] = ta.atrp_14

    # Percentage Volume Oscillator (PVO) is momentum volume oscillator used in technical analysis
    #   to evaluate and measure volume surges and to compare trading volume to the average longer-term volume.
    # PVO does not analyze price and it is based solely on volume.
    #  It compares fast and slow volume moving averages by showing how short-term volume differs from
    #  the average volume over longer-term.
    #  Since it does not care a trend's factor in its calculation (only volume data are used)
    #  this technical indicator cannot be used alone to predict changes in a trend.
    ta_picks['pvo_12_26'] = ta.pvo_12_26

    return pd.concat(
        [ohlcv_stats, lagged_cv_pct, cm_picks, ta_picks],
        axis='columns',
        verify_integrity=True,
        sort=True,
        join='inner'
    )
示例#4
0
def build(ohlcv: pd.DataFrame, coinmetrics: pd.DataFrame, **kwargs):
    W = kwargs.get('W', 10)
    ta = get_ta_features(ohlcv, TA_CONFIG)

    residuals = pd.DataFrame()
    residuals['open_resid'] = get_residual(ohlcv.open)
    residuals['high_resid'] = get_residual(ohlcv.high)
    residuals['low_resid'] = get_residual(ohlcv.low)
    residuals['close_resid'] = get_residual(ohlcv.close)
    history_facet = pd.concat(
        [residuals] + [make_lagged(residuals, i) for i in range(1, W + 1)],
        axis='columns',
        verify_integrity=True,
        sort=True,
        join='inner')

    # Price trend facet (REMA/RSMA, MACD, AO, ADX, WD+ - WD-)
    trend_facet = ta[[
        "rsma_5_20", "rsma_8_15", "rsma_20_50", "rema_5_20", "rema_8_15",
        "rema_20_50", "macd_12_26", "ao_14", "adx_14", "wd_14"
    ]]
    # Volatility facet (CMO, ATRp)
    volatility_facet = ta[["cmo_14", "atrp_14"]]
    # Volume facet (Volume pct, PVO, ADI, OBV)
    volume_pct = ohlcv.volume.pct_change().replace([np.inf, -np.inf], 0)
    volume_facet = pd.concat([volume_pct, ta[["pvo_12_26", "adi", "obv"]]],
                             axis='columns',
                             verify_integrity=True,
                             sort=True,
                             join='inner')
    # On-chain facet
    cm_1 = coinmetrics.reindex(columns=[
        'adractcnt', 'txtfrvaladjntv', 'isstotntv',
        'feetotntv', 'splycur', 'hashrate',
        'difficulty', 'txtfrcount'])\
        .pct_change()
    cm_2 = coinmetrics.reindex(columns=['isscontpctann'])
    chain_facet = pd.concat([cm_1, cm_2],
                            axis='columns',
                            verify_integrity=True,
                            sort=True,
                            join='inner')

    # Drop columns whose values are all nan or inf from each facet
    with pd.option_context('mode.use_inf_as_na',
                           True):  # Set option temporarily
        history_facet = history_facet.dropna(axis='columns', how='all')
        trend_facet = trend_facet.dropna(axis='columns', how='all')
        volatility_facet = volatility_facet.dropna(axis='columns', how='all')
        volume_facet = volume_facet.dropna(axis='columns', how='all')
        chain_facet = chain_facet.dropna(axis='columns', how='all')

    # feature_groups = {
    #     'price_history': [c for c in history_facet.columns],
    #     'trend': [c for c in trend_facet.columns],
    #     'volatility': [c for c in volatility_facet.columns],
    #     'volume': [c for c in volume_facet.columns],
    #     'chain': [c for c in chain_facet.columns],
    # }

    return pd.concat([
        history_facet, trend_facet, volatility_facet, volume_facet, chain_facet
    ],
                     axis='columns',
                     verify_integrity=True,
                     sort=True,
                     join='inner')