示例#1
0
    def gen_features(self, data, natr_window, rsi_window, price=dcons.CLOSE):
        """Generates features for NATR RSI.

        Parameters 
        ----------
        data: pandas dataframe 
            Holds data/prices for a specific asset, expected to have OHLC.
        natr_window: integer
            The time window used for the NATR.
        rsi_window: integer
            The time window used for the RSI.
        price: str, optional(defaulf='close')
            The column name used to apply the lead and lag moving average on. 
            By default this will be applied to the closing price ('close') 
            but this can be applied to any column with numeric values.  
        """

        # set config for ta-lib
        ta_config = {
            "natr": [("natr", {
                "timeperiod": natr_window
            })],
            "rsi": [("rsi", {
                "timeperiod": rsi_window,
                "price": price
            })]
        }

        # apply NATR and RSI to dataframe
        ta.apply_ta(data, ta_config)
示例#2
0
    def gen_features(self, data, lead, lead_t, lag, lag_t, price=dcons.CLOSE):
        """Generates features which include the lead and lag moving averages.
        
        Parameters 
        ----------
        data: pandas dataframe 
            Holds data/prices for a specific asset, expected to have OHLC.
        lead: str, 'sma', 'ema'
            Used to specify the type of moving average to be used for the lead moving average. 
            -'sma': the simple moving average
            -'ema': exponential moving average
        lead_t: integer 
            The time window used for the lead moving average. 
        lag: str, 'sma', 'ema'
            Used to specify the type of moving average to be used for the lag moving average. 
            -'sma': the simple moving average
            -'ema': exponential moving average
        price: str, optional(defaulf='close')
            The column name used to apply the lead and lag moving average on. 
            By default this will be applied to the closing price ('close') 
            but this can be applied to any column with numeric values.  
        """

        # the same type of moving average
        if lead == lag:
            ta_config = {
                lead: [("lead_ma", {
                    "timeperiod": lead_t,
                    "price": price
                }), ("lag_ma", {
                    "timeperiod": lag_t,
                    "price": price
                })]
            }
        # different types of moving average
        else:
            ta_config = {
                lead: [("lead_ma", {
                    "timeperiod": lead_t,
                    "price": price
                })],
                lag: [("lag_ma", {
                    "timeperiod": lag_t,
                    "price": price
                })]
            }

        # apply moving averages to dataframe
        ta.apply_ta(data, ta_config)
    def gen_features(self, data, window, price=dcons.CLOSE):
        """Generates features for William%R which includes a value between 0 and -100.
        
        Parameters 
        ----------
        data: pandas dataframe 
            Holds data/prices for a specific asset, expected to have OHLC.
        window: integer
            The time window used for the William%R.
        price: str, optional(defaulf='close')
            The column name used to apply the lead and lag moving average on. 
            By default this will be applied to the closing price ('close') 
            but this can be applied to any column with numeric values.  
        """

        # set config for ta-lib
        ta_config = {"willr": [("willr", {"timeperiod": window})]}

        # apply RSI to dataframe
        ta.apply_ta(data, ta_config)
    def gen_features(self,
                     data,
                     ema_lead,
                     ema_lag,
                     sma_lead,
                     sma_lag,
                     rsi_window,
                     willr_window,
                     natr_window,
                     price=dcons.CLOSE):
        """Generates features for all the combined technical indicators.
           Which include EMA, SMA, RSI and William%R. It also included NATR. 
        
        Parameters 
        ----------
        data: pandas dataframe 
            Holds data/prices for a specific asset, expected to have OHLC.
        ema_lead: integer
            The time window used for the EMA lead.
        ema_lag: integer
            The time window used for the EMA lag.
        sma_lead: integer
            The time window used for the SMA lead.
        sma_lag: integer
            The time window used for the SMA lag.  
        rsi_window: integer 
            The time window used for the RSI. 
        willr_window:
            The time window used for William%R.
        natr_window:
            The time window used for NATR. 
        price: str, optional(defaulf='close')
            The column name used to apply the lead and lag moving average on. 
            By default this will be applied to the closing price ('close') 
            but this can be applied to any column with numeric values.  
        """

        # set config for ta-lib
        ta_config = {
            "ema": [("lead_ema", {
                "timeperiod": ema_lead,
                "price": price
            }), ("lag_ema", {
                "timeperiod": ema_lag,
                "price": price
            })],
            "sma": [("lead_sma", {
                "timeperiod": sma_lead,
                "price": price
            }), ("lag_sma", {
                "timeperiod": sma_lag,
                "price": price
            })],
            "rsi": [("rsi", {
                "timeperiod": rsi_window,
                "price": price
            })],
            "willr": [("willr", {
                "timeperiod": willr_window
            })],
            "natr": [("natr", {
                "timeperiod": natr_window
            })]
        }

        # apply RSI to dataframe
        ta.apply_ta(data, ta_config)
示例#5
0
    def gen_features(self, 
                     data, 
                     ema_lead, 
                     ema_lag, 
                     sma_lead, 
                     sma_lag, 
                     rsi_window, 
                     willr_window,
                     natr_window, 
                     lagged_features,
                     create_y= False, 
                     price=dcons.CLOSE):     
        """Generates features for technical indicators.
           Which include EMA, SMA, RSI, William%R and NATR. 
        
        Parameters 
        ----------
        data: pandas dataframe 
            Holds data/prices for a specific asset, expected to have OHLC.
        ema_lead: integer
            The time window used for the EMA lead.
        ema_lag: integer
            The time window used for the EMA lag.
        sma_lead: integer
            The time window used for the SMA lead.
        sma_lag: integer
            The time window used for the SMA lag.  
        rsi_window: integer 
            The time window used for the RSI. 
        willr_window: integer
            The time window used for William%R.
        natr_window: integer
            The time window used for NATR. 
        lagged_features: integer
            Lagged features to be considered. 
        create_y: boolean
            If true creates the y variable used for training. 
        price: str, optional(defaulf='close')
            The column name used to apply the lead and lag moving average on. 
            By default this will be applied to the closing price ('close') 
            but this can be applied to any column with numeric values.  
            
        Returns
        -------
            list of str:
                A list of string containing the feature names. 
        """

        # set config for ta-lib
        ta_config = {
            "ema":[("lead_ema", {"timeperiod":ema_lead, "price":price}),
                  ("lag_ema", {"timeperiod":ema_lag, "price":price})],
            "sma":[("lead_sma", {"timeperiod":sma_lead, "price":price}),
                  ("lag_sma", {"timeperiod":sma_lag, "price":price})],
            "rsi":[("rsi", {"timeperiod": rsi_window, "price": price})],
            "willr":[("willr", {"timeperiod":willr_window})],
            "natr":[("natr", {"timeperiod":natr_window})]
        }

        # apply RSI to dataframe
        ta.apply_ta(data, ta_config)
        
        # create a list for feature names 
        feature_names = []
        
        # apply lagged features to RSI, William%R and NATR
        
        # lagged features for RSI + normalise + append to features list
        lagged_rsi_cols = ut.lag_col(data, lagged_features, "rsi")   
        lagged_rsi_cols += ["rsi"]
        data[lagged_rsi_cols] = data[lagged_rsi_cols].div(data[lagged_rsi_cols].sum(axis=1), axis=0)
        feature_names += lagged_rsi_cols  
        
        # lagged features for William%R + normalise + append to features list
        lagged_willr_cols = ut.lag_col(data, lagged_features, "willr")   
        lagged_willr_cols += ["willr"]
        data[lagged_willr_cols] = data[lagged_willr_cols].div(data[lagged_willr_cols].sum(axis=1), axis=0)
        feature_names += lagged_willr_cols  
        
        # lagged features for NATR + normalise + append to features list
        lagged_natr_cols = ut.lag_col(data, lagged_features, "natr")   
        lagged_natr_cols += ["natr"]
        data[lagged_natr_cols] = data[lagged_natr_cols].div(data[lagged_natr_cols].sum(axis=1), axis=0)
        feature_names += lagged_natr_cols  
        
        # for moving averages first we create lagged for both EMA and SMA 
        
        # create lagged for ema (LEAD) and normalise
        lagged_lead_ema = ut.lag_col(data, lagged_features, "lead_ema")
        lagged_lead_ema += ["lead_ema"]
        data[lagged_lead_ema] = data[lagged_lead_ema].div(data[lagged_lead_ema].sum(axis=1), axis=0)
        
        # create lagged for ema (LAG) and normalise
        lagged_lag_ema = ut.lag_col(data, lagged_features, "lag_ema")
        lagged_lag_ema += ["lag_ema"]
        data[lagged_lag_ema] = data[lagged_lag_ema].div(data[lagged_lag_ema].sum(axis=1), axis=0)
        
        # apply feature reduction of these two 
        # since we are interested in the value with respect to each other 
        # we subtract the lag with the lead for all lagged ema (eg. lag_ema - lead_ema, lag_ema_1 - lead_ema_1)
        data["ema_0"] = data["lag_ema"] - data["lead_ema"]
        feature_names += ["ema_0"]
        
        # apply feature reduction on lagged ema 
        for i in range(lagged_features):
            tmp = i + 1
            data["ema_{}".format(tmp)] = data["lag_ema_lag_{}".format(tmp)] - data["lead_ema_lag_{}".format(tmp)] 
            feature_names += ["ema_{}".format(tmp)]
        
        # create lagged for sma (LEAD) and normalise 
        lagged_lead_sma = ut.lag_col(data, lagged_features, "lead_sma")
        lagged_lead_sma += ["lead_sma"]
        data[lagged_lead_sma] = data[lagged_lead_sma].div(data[lagged_lead_sma].sum(axis=1), axis=0)
        
        # create lagged for sma (LAG) and normalise
        lagged_lag_sma = ut.lag_col(data, lagged_features, "lag_sma")
        lagged_lag_sma += ["lag_sma"]
        data[lagged_lag_sma] = data[lagged_lag_sma].div(data[lagged_lag_sma].sum(axis=1), axis=0)
        
        # apply feature reduction of these two 
        # since we are interested in the value with respect to each other 
        # we subtract the lag with the lead for all lagged sma (eg. lag_sma - lead_sma, lag_sma_1 - lead_sma_1)
        data["sma_0"] = data["lag_sma"] - data["lead_sma"]
        feature_names += ["sma_0"]
        
        # apply feature reduction on lagged sma 
        for i in range(lagged_features):
            tmp = i + 1
            data["sma_{}".format(tmp)] = data["lag_sma_lag_{}".format(tmp)] - data["lead_sma_lag_{}".format(tmp)] 
            feature_names += ["sma_{}".format(tmp)]
                
        # drop nan values 
        data.dropna(inplace=True)

        # if create_y is true we need to create the y variable 
        # this will be used in training 
        if create_y == True:
            # predict direction
            data["y"] = (data["close"] < data["close"].shift(-1)).astype(int)
        
        return feature_names