Пример #1
0
def preProcessCalculation(df, limit):
    df = df.reset_index()
    adx = np.array(
        average_directional_index(df['close'], df['high'], df['low'], limit))
    mfi = np.array(
        money_flow_index(df['close'], df['high'], df['low'], df['volume'],
                         limit - 1))
    atr = np.array(average_true_range(df['close'], limit))
    obv = np.array(on_balance_volume(df['close'], df['volume']))
    rocr = np.array(rate_of_change(df['close'], limit))
    mom = np.array(momentum(df['close'], limit))
    return adx, mfi, atr, obv, rocr, mom
Пример #2
0
def atr(dataframe, period, field='close') -> ndarray:
    from pyti.average_true_range import average_true_range
    return average_true_range(dataframe[field], period)
Пример #3
0
 def test_average_true_range_period_8(self):
     period = 8
     atr = average_true_range.average_true_range(self.close_data, period)
     np.testing.assert_allclose(atr, self.atr_period_8_expected)
Пример #4
0
 def test_average_true_range_invalid_period(self):
     period = 128
     with self.assertRaises(Exception) as cm:
         average_true_range.average_true_range(self.close_data, period)
     expected = "Error: data_len < period"
     self.assertEqual(str(cm.exception), expected)
Пример #5
0
 def test_average_true_range_period_10(self):
     period = 10
     atr = average_true_range.average_true_range(self.close_data, period)
     np.testing.assert_array_equal(atr, self.atr_period_10_expected)
Пример #6
0
def add_features(asset_data, features=None, save_path=None, verbose=True):
    """ Adds features to the bar data. If no features are passed then all features
    are added.
    
    Args:
        asset_data ((str, pd.DataFrame) or (str, pd.DataFrame)[]): a single 
        or list of tuples (asset name, bar data) containing the bar data
        features (str[]): a list of features to include, all features if this 
            is None
        save_path (str): Path to save the bar data with features. A 
        placeholder {ASSET} that will be substituted with the asset name
        verbose (bool): True if progress printing to console is desired
        
    Returns:
        (str, pd.DataFrame): a single tuple of (asset name, dataframe) of the 
            bar data if a single asset was passed or an array of (asset name, dataframes)
            if an array of assets was passed
    
    """

    #helper function for creating logs
    def replace_zero_with_min(series):
        return series.replace(0, series.loc[series > 0].min())

    import pandas as pd
    import numpy as np
    from pyti import bollinger_bands as bbands
    from pyti import average_true_range as atr

    #if a single dataframe is passed just put it in a single item list
    if type(asset_data) == tuple:
        asset_data = [asset_data]
    elif type(asset_data) != list:
        raise ValueError(
            'asset_data must be a pandas.DataFrame or a list of pandas.Dataframe.'
        )

    feature_bars = []

    for asset, data in asset_data:

        if verbose: print("Calculating features for {0}".format(asset))

        #Lower Bollinger Band, 20 periods, std = 2
        if features == None or 'lower_bb' in features:
            data['lower_bb'] = bbands.lower_bollinger_band(data["close"],
                                                           20,
                                                           std=2.0)

        #Upper Bollinger Band, 20 periods, std = 2
        if features == None or 'upper_bb' in features:
            data['upper_bb'] = bbands.upper_bollinger_band(data["close"],
                                                           20,
                                                           std_mult=2.0)

        #Average True Range
        if features == None or 'atr' in features:
            data["atr"] = atr.average_true_range(data["close"], 24) / \
                atr.average_true_range(data["close"], 200)

        #Volatility is the standard deviation over 12 periods of the difference
        #between high and low of the bar
        if features == None or 'volatility_12' in features:
            data["volatility_12"] = (data["high"] -
                                     data["low"]).rolling(12).std()

        #Volatility is the standard deviation over 200 periods of the difference
        #between high and low of the bar
        if features == None or 'volatility_200' in features:
            data["volatility_200"] = (data["high"] -
                                      data["low"]).rolling(200).std()

        #Volatility is relative change of the alst 12 bars over the last 200
        if features == None or 'volatility' in features:
            data["volatility"] = data["volatility_12"] / data["volatility_200"]

        #Relative volume compared to the last 100 bars
        if features == None or 'volume_change' in features:
            data["volume_change"] = data["volume"] / data["volume"].rolling(
                100).mean()

        #Distance between the close price and the upper bollinger bad
        if features == None or 'bb_dist_upper' in features:
            data["bb_dist_upper"] = data["upper_bb"] - data["close"]

        #Distance between the close price and the lower bollinger bad
        if features == None or 'bb_dist_lower' in features:
            data["bb_dist_lower"] = -(data["lower_bb"] - data["close"])

        #Distance between the upper and lower bollinger bands
        if features == None or 'bb_range' in features:
            data["bb_range"] = (data["upper_bb"] -
                                data["lower_bb"]) / data["close"]

        #The absolute value of the % return over the last 4 bars
        if features == None or 'change_4bar' in features:
            data["change_4bar"] = np.abs(
                np.log(data["close"] / data["close"].shift(4)))

        #The Augmented Dicker Fuller test which can show mean reverting or trending markets
        #from statsmodels.tsa.stattools import adfuller
        #if features == None or 'adf' in features:
        #    data["adf"] = data['Close'].rolling(200).apply(lambda x: adfuller(x)[0], raw=False)

        #The log of the % return
        if features == None or 'log_return' in features:
            data["log_return"] = np.log(data["close"] / data["close"].shift(1))

        #TODO add in the if feature statements
        #add logs - do this by replacing all zeros with the minimum value after zeros are removed
        if 1 == 0:
            data["volume_log"] = np.log(replace_zero_with_min(
                data["volume"])) / np.log(
                    replace_zero_with_min(data["volume"].rolling(200).mean()))
            data["atr_log"] = np.log(replace_zero_with_min(data["atr"]))
            data["volatility_log"] = np.log(
                replace_zero_with_min(data["volatility"]))
            data["change_4bar_log"] = np.log(
                replace_zero_with_min(data["change_4bar"]))

            #add moving averages
            data["volume_log_ma_12"] = data["volume_log"].rolling(12).mean()
            data["volume_log_ma_24"] = data["volume_log"].rolling(24).mean()
            data["volume_log_ma_48"] = data["volume_log"].rolling(48).mean()
            data["volatility_log_ma_12"] = data["volatility_log"].rolling(
                12).mean()
            data["volatility_log_ma_24"] = data["volatility_log"].rolling(
                24).mean()
            data["volatility_log_ma_48"] = data["volatility_log"].rolling(
                48).mean()
            data["change_4bar_log_ma_12"] = data["change_4bar_log"].rolling(
                12).mean()
            data["change_4bar_log_ma_24"] = data["change_4bar_log"].rolling(
                24).mean()
            data["change_4bar_log_ma_48"] = data["change_4bar_log"].rolling(
                48).mean()
            data["log_return_ma_12"] = data["log_return"].rolling(12).mean()
            data["log_return_ma_24"] = data["log_return"].rolling(24).mean()
            data["log_return_ma_48"] = data["log_return"].rolling(48).mean()

        feature_bars.append((asset, data))

        if save_path is not None:
            if verbose: print("Saving {0}...".format(asset))
            save_location = save_path.replace("{ASSET}", asset)
            data.to_csv(save_location)

    #if there was just one dataFrame just return a single dataframe, otherwise
    #return the list of dataframes, one for each asset
    if len(feature_bars) == 1:
        return feature_bars[0]
    else:
        return feature_bars