class TestMFIIndicator(unittest.TestCase): """ https://school.stockcharts.com/doku.php?id=technical_indicators:money_flow_index_mfi """ _filename = 'ta/tests/data/cs-mfi.csv' def setUp(self): self._df = pd.read_csv(self._filename, sep=',') self._indicator = MFIIndicator( high=self._df['High'], low=self._df['Low'], close=self._df['Close'], volume=self._df['Volume'], n=14, fillna=False) def tearDown(self): del(self._df) def test_mfi(self): target = 'MFI' result = self._indicator.money_flow_index() pd.testing.assert_series_equal(self._df[target].tail(), result.tail(), check_names=False) def test_mfi2(self): target = 'MFI' result = money_flow_index( high=self._df['High'], low=self._df['Low'], close=self._df['Close'], volume=self._df['Volume'], n=14, fillna=False) pd.testing.assert_series_equal(self._df[target].tail(), result.tail(), check_names=False)
def add_volume_indicators(data: pd.DataFrame) -> pd.DataFrame: """Adds the volume indicators. Parameters ---------- data : pd.DataFrame A dataframe with daily stock values. Must include: open, high, low, close and volume. It should also be sorted in a descending manner. Returns ------- pd.DataFrame The input dataframe with the indicators added. """ chaikin = ChaikinMoneyFlowIndicator(data['high'], data['low'], data['close'], data['volume']) mfi = MFIIndicator(data['high'], data['low'], data['close'], data['volume']) obv = OnBalanceVolumeIndicator(data['close'], data['volume']) data.loc[:, 'chaikin'] = chaikin.chaikin_money_flow() data.loc[:, 'mfi'] = mfi.money_flow_index() data.loc[:, 'obv'] = obv.on_balance_volume() return data
def applyIndicator(self, full_company_price): self.data = full_company_price high = self.data['high'] low = self.data['low'] close = self.data['close'] volume = self.data['volume'] EMA12 = EMAIndicator(close, 12, fillna=False) EMA30 = EMAIndicator(close, 20, fillna=False) EMA60 = EMAIndicator(close, 60, fillna=False) MACD1226 = MACD(close, 26, 12, 9, fillna=False) MACD2452 = MACD(close, 52, 24, 18, fillna=False) ROC12 = ROCIndicator(close, 12, fillna=False) ROC30 = ROCIndicator(close, 30, fillna=False) ROC60 = ROCIndicator(close, 60, fillna=False) RSI14 = RSIIndicator(close, 14, fillna=False) RSI28 = RSIIndicator(close, 28, fillna=False) RSI60 = RSIIndicator(close, 60, fillna=False) AROON25 = AroonIndicator(close, 25, fillna=False) AROON50 = AroonIndicator(close, 50, fillna=False) AROON80 = AroonIndicator(close, 80, fillna=False) MFI14 = MFIIndicator(high, low, close, volume, 14, fillna=False) MFI28 = MFIIndicator(high, low, close, volume, 28, fillna=False) MFI80 = MFIIndicator(high, low, close, volume, 80, fillna=False) CCI20 = CCIIndicator(high, low, close, 20, 0.015, fillna=False) CCI40 = CCIIndicator(high, low, close, 40, 0.015, fillna=False) CCI100 = CCIIndicator(high, low, close, 100, 0.015, fillna=False) WILLR14 = WilliamsRIndicator(high, low, close, 14, fillna=False) WILLR28 = WilliamsRIndicator(high, low, close, 28, fillna=False) WILLR60 = WilliamsRIndicator(high, low, close, 60, fillna=False) BBANDS20 = BollingerBands(close, 20, 2, fillna=False) KC20 = KeltnerChannel(high, low, close, 20, 10, fillna=False) STOCH14 = StochasticOscillator(high, low, close, 14, 3, fillna=False) STOCH28 = StochasticOscillator(high, low, close, 28, 6, fillna=False) STOCH60 = StochasticOscillator(high, low, close, 60, 12, fillna=False) CMI20 = ChaikinMoneyFlowIndicator(high, low, close, volume, 20, fillna=False) CMI40 = ChaikinMoneyFlowIndicator(high, low, close, volume, 40, fillna=False) CMI100 = ChaikinMoneyFlowIndicator(high, low, close, volume, 100, fillna=False) self.data['ema12'] = (close - EMA12.ema_indicator()) / close self.data['ema30'] = (close - EMA30.ema_indicator()) / close self.data['ema60'] = (close - EMA60.ema_indicator()) / close self.data['macd1226'] = MACD1226.macd() - MACD1226.macd_signal() self.data['macd2452'] = MACD2452.macd() - MACD2452.macd_signal() self.data['roc12'] = ROC12.roc() self.data['roc30'] = ROC30.roc() self.data['roc60'] = ROC60.roc() self.data['rsi14'] = RSI14.rsi() self.data['rsi28'] = RSI28.rsi() self.data['rsi60'] = RSI60.rsi() self.data['aroon25'] = AROON25.aroon_indicator() self.data['aroon50'] = AROON50.aroon_indicator() self.data['aroon80'] = AROON80.aroon_indicator() self.data['mfi14'] = MFI14.money_flow_index() self.data['mfi28'] = MFI28.money_flow_index() self.data['mfi80'] = MFI80.money_flow_index() self.data['cci20'] = CCI20.cci() self.data['cci40'] = CCI40.cci() self.data['cci100'] = CCI100.cci() self.data['willr14'] = WILLR14.wr() self.data['willr28'] = WILLR28.wr() self.data['willr60'] = WILLR60.wr() self.data['bband20up'] = (BBANDS20.bollinger_hband() - close) / close self.data['bband20down'] = (close - BBANDS20.bollinger_lband()) / close self.data['stoch14'] = STOCH14.stoch() self.data['stoch28'] = STOCH28.stoch() self.data['stoch60'] = STOCH60.stoch() self.data['cmi20'] = CMI20.chaikin_money_flow() self.data['cmi40'] = CMI40.chaikin_money_flow() self.data['cmi100'] = CMI100.chaikin_money_flow() self.data['kc20up'] = (KC20.keltner_channel_hband() - close) / close self.data['kc20down'] = (close - KC20.keltner_channel_lband()) / close return self.data
def createDataset(self, symbol: str, startDate, endDate, useAllIndicators=True, isAugmenting=False, timePeriodForOutputs=24): """ Creates a dataset. Please make sure that the start and end dates are the beginnings of days. :param symbol: e.g. "BTCUSDT" :param startDate: e.g. datetime(year=2020, month=1, day=1) :param endDate: e.g. datetime(year=2020, month=2, day=1) :param useAllIndicators: if False, only uses the minimum indicators :param isAugmenting: used by createAugmentedDataset when augmenting. :param timePeriodForOutputs: if set to 24, this will generate the labels (percentiles) for the next 24 hours after the 15-day period that appears in the input. """ # These are time-related variables. timezone = "Etc/GMT-0" timezone = pytz.timezone(timezone) outputStartDate = startDate # We need to go back a little earlier to generate indicators such as RSI. startDate -= timedelta(days=DAYS_IN_AN_INPUT + 60) endDate = timezone.localize(endDate) startDate = timezone.localize(startDate) # outputStartDate = timezone.localize(outputStartDate) # We will be collecting our final features and labels in here: self.inputData = [] self.outputData = [] # This dataframe has all the raw data we need to generate the dataset. df = self.dataObtainer.getHistoricalDataAsDataframe(symbol) # First, we will gather all of the means for our inputs... closeMeans = [] volumeMeans = [] # ... also, we will gather the outputs, which represent the # distributions of the next day prices. output15thPercentiles = [] output25thPercentiles = [] output35thPercentiles = [] outputMedians = [] output65thPercentiles = [] output75thPercentiles = [] output85thPercentiles = [] # We will use this to normalize our outputs by dividing them by the # mean price of the last (latest/most recent) day in our input. priceMeansToDivideLabelsBy = [] volumeMeansToDivideLabelsBy = [] date = startDate # For augmentation: phaseShift = uniform(0, np.pi * 2) count = 0 # Now we will be collecting the input prices, input volumes, and output # percentiles. while date < endDate: print("Processing", date, "/", endDate) # First, we will collect the start and end dates for this input # point (which consists of 3 hours of data if that is our input # time interval). Then we calculate the mean price and volume for # this input data point. startIndex = df.index[df["Timestamp"] == date].tolist() # If this if condition is true, then we may be missing some data in # our dataset. I think this happens during times when Binance was # down. In this case, we just use the previous data. if len(startIndex) == 0: date += self._dataTimeInterval closeMeans.append(closeMeans[-1]) volumeMeans.append(volumeMeans[-1]) outputMedians.append(outputMedians[-1]) output15thPercentiles.append(output15thPercentiles[-1]) output25thPercentiles.append(output25thPercentiles[-1]) output35thPercentiles.append(output35thPercentiles[-1]) output65thPercentiles.append(output65thPercentiles[-1]) output75thPercentiles.append(output75thPercentiles[-1]) output85thPercentiles.append(output85thPercentiles[-1]) priceMeansToDivideLabelsBy.append(priceMeansToDivideLabelsBy[-1]) volumeMeansToDivideLabelsBy.append(volumeMeansToDivideLabelsBy[-1]) continue startIndex = startIndex[0] endIndex = df.index[df["Timestamp"] == date + self._dataTimeInterval].tolist() if len(endIndex) == 0: date += self._dataTimeInterval closeMeans.append(closeMeans[-1]) volumeMeans.append(volumeMeans[-1]) outputMedians.append(outputMedians[-1]) output15thPercentiles.append(output15thPercentiles[-1]) output25thPercentiles.append(output25thPercentiles[-1]) output35thPercentiles.append(output35thPercentiles[-1]) output65thPercentiles.append(output65thPercentiles[-1]) output75thPercentiles.append(output75thPercentiles[-1]) output85thPercentiles.append(output85thPercentiles[-1]) priceMeansToDivideLabelsBy.append(priceMeansToDivideLabelsBy[-1]) volumeMeansToDivideLabelsBy.append(volumeMeansToDivideLabelsBy[-1]) continue endIndex = endIndex[0] data = df.iloc[startIndex : endIndex] if isAugmenting: x = phaseShift + count augmentation = 1 + np.sin(x) * uniform(0.02, 0.04) closeMeans.append(data["Close"].mean() * augmentation) volumeMeans.append(data["Volume"].mean() * augmentation) count += uniform(0.3, 0.6) if count > 2 * np.pi: count = 0 else: closeMeans.append(data["Close"].mean()) volumeMeans.append(data["Volume"].mean()) # Now we get the start and end dates for output data that would # be associated with an entry that begins at the data point found # above. Then we calculate the percentiles for the output. date2 = date + timedelta(days=DAYS_IN_AN_INPUT) startIndex = df.index[df["Timestamp"] == date2].tolist() if len(startIndex) == 0: date += self._dataTimeInterval outputMedians.append(outputMedians[-1]) output15thPercentiles.append(output15thPercentiles[-1]) output25thPercentiles.append(output25thPercentiles[-1]) output35thPercentiles.append(output35thPercentiles[-1]) output65thPercentiles.append(output65thPercentiles[-1]) output75thPercentiles.append(output75thPercentiles[-1]) output85thPercentiles.append(output85thPercentiles[-1]) priceMeansToDivideLabelsBy.append(priceMeansToDivideLabelsBy[-1]) volumeMeansToDivideLabelsBy.append(volumeMeansToDivideLabelsBy[-1]) continue startIndex = startIndex[0] date2 += timedelta(hours=timePeriodForOutputs) endIndex = df.index[df["Timestamp"] == date2].tolist() if len(endIndex) == 0: date += self._dataTimeInterval outputMedians.append(outputMedians[-1]) output15thPercentiles.append(output15thPercentiles[-1]) output25thPercentiles.append(output25thPercentiles[-1]) output35thPercentiles.append(output35thPercentiles[-1]) output65thPercentiles.append(output65thPercentiles[-1]) output75thPercentiles.append(output75thPercentiles[-1]) output85thPercentiles.append(output85thPercentiles[-1]) priceMeansToDivideLabelsBy.append(priceMeansToDivideLabelsBy[-1]) volumeMeansToDivideLabelsBy.append(volumeMeansToDivideLabelsBy[-1]) continue endIndex = endIndex[0] data = df.iloc[startIndex: endIndex]["Close"] outputMedians.append(data.median()) output15thPercentiles.append(data.quantile(0.15)) output25thPercentiles.append(data.quantile(0.25)) output35thPercentiles.append(data.quantile(0.35)) output65thPercentiles.append(data.quantile(0.65)) output75thPercentiles.append(data.quantile(0.75)) output85thPercentiles.append(data.quantile(0.85)) # Lastly, we need to get the last input day's mean price, which we # use to normalize our output percentiles. date3 = date + timedelta(days=DAYS_IN_AN_INPUT - 1) startIndex = df.index[df["Timestamp"] == date3].tolist() if len(startIndex) == 0: date += self._dataTimeInterval priceMeansToDivideLabelsBy.append(priceMeansToDivideLabelsBy[-1]) volumeMeansToDivideLabelsBy.append(volumeMeansToDivideLabelsBy[-1]) continue startIndex = startIndex[0] date3 = date + timedelta(days=DAYS_IN_AN_INPUT) endIndex = df.index[df["Timestamp"] == date3].tolist() if len(endIndex) == 0: date += self._dataTimeInterval priceMeansToDivideLabelsBy.append(priceMeansToDivideLabelsBy[-1]) volumeMeansToDivideLabelsBy.append(volumeMeansToDivideLabelsBy[-1]) continue endIndex = endIndex[0] data = df.iloc[startIndex: endIndex] priceMeansToDivideLabelsBy.append(data["Close"].mean()) volumeMeansToDivideLabelsBy.append(data["Volume"].mean()) date += self._dataTimeInterval # Now that our while loop above collected data for inputs and # outputs, we need to generate technical indicators as additional # input features. We seem to be getting good performance if we only # use close, volume, rsi, ema and mfi, but we also have some other # indicators to play around with, such as ma and an additional rsi # with a different parameter. stock = StockDataFrame({ "close": closeMeans, "volume": volumeMeans }) # The standard RSI is 14 day. Note that if our time interval is 3 hrs, # there are 8 data points in a day. Thus, a 14 day RSI is a 112-RSI # because 14 * 8 = 112. rsis = (stock["rsi:112"] / 100).tolist() rsis2 = (stock["rsi:14"] / 100).tolist() emas = (stock["ema:21"]).tolist() macds = stock["macd:96,208"].tolist() macds2 = stock["macd:24,52"].tolist() bollUppers = stock["boll.upper:160"].tolist() bollLowers = stock["boll.lower:160"].tolist() from ta.volume import MFIIndicator moneyFlowIndex = MFIIndicator(stock["close"], stock["close"], stock["close"], stock["volume"], window=14) mfis = (moneyFlowIndex.money_flow_index().divide(100)).to_list() # This gets rid of NANs in our indicators (just in case). import math rsis = [0 if math.isnan(x) else x for x in rsis] rsis2 = [0 if math.isnan(x) else x for x in rsis2] emas = [0 if math.isnan(x) else x for x in emas] macds = [0 if math.isnan(x) else x for x in macds] macds2 = [0 if math.isnan(x) else x for x in macds2] bollUppers = [0 if math.isnan(x) else x for x in bollUppers] bollLowers = [0 if math.isnan(x) else x for x in bollLowers] mfis = [0 if math.isnan(x) else x for x in mfis] # Now we will generate our final inputs and outputs! See the for loop # below. entryAmount = int((len(closeMeans) - self._numberOfSamples - 1)) if self.dayByDay: advanceAmount = self._datapointsPerDay else: advanceAmount = 1 def fixWithin0And1(x): return min(max(x, 0.0), 1.0) for i in range(60 * self._datapointsPerDay, entryAmount, advanceAmount): print("Percent of entries created: " + str(i / entryAmount * 100) + "%") yesterdayCloseMean = priceMeansToDivideLabelsBy[i] yesterdayVolumeMean = volumeMeansToDivideLabelsBy[i] # This gets the input features and outputs for this dataset entry. close = closeMeans[i : i + self._numberOfSamples] volume = volumeMeans[i : i + self._numberOfSamples] rsi = rsis[i : i + self._numberOfSamples] rsi2 = rsis2[i: i + self._numberOfSamples] ema = emas[i: i + self._numberOfSamples] macd = macds[i: i + self._numberOfSamples] macd2 = macds2[i: i + self._numberOfSamples] ema = [fixWithin0And1(m / yesterdayCloseMean / 2) for m in ema] macd = [fixWithin0And1(m / yesterdayCloseMean / 2 + 0.5) for m in macd] macd2 = [fixWithin0And1(m / yesterdayCloseMean / 2 + 0.5) for m in macd2] mfi = mfis[i: i + self._numberOfSamples] bollUpper = bollUppers[i: i + self._numberOfSamples] bollUpper = [fixWithin0And1(m / yesterdayCloseMean / 2) for m in bollUpper] bollLower = bollLowers[i: i + self._numberOfSamples] bollLower = [fixWithin0And1(m / yesterdayCloseMean / 2) for m in bollLower] for j in range(len(close)): close[j] = fixWithin0And1(close[j] / yesterdayCloseMean / 2) for j in range(len(volume)): volume[j] = fixWithin0And1(volume[j] / yesterdayVolumeMean / 2) # Finally, we add the entry to the dataset. if useAllIndicators: self.inputData.append([close, volume, rsi, rsi2, ema, macd, macd2, bollUpper, bollLower, mfi]) else: self.inputData.append([close, volume, rsi, ema, mfi]) # This normalizes our data. 0.5 means that the percentile is the same # as the last day's mean. 1.0 means that the percentile is twice the # value of the last day's mean. We normalize in this way so that we # can use the sigmoid activation function for the outputs, which output15thPercentile = output15thPercentiles[i] / yesterdayCloseMean / 2 output25thPercentile = output25thPercentiles[i] / yesterdayCloseMean / 2 output35thPercentile = output35thPercentiles[i] / yesterdayCloseMean / 2 outputMedian = outputMedians[i] / yesterdayCloseMean / 2 output65thPercentile = output65thPercentiles[i] / yesterdayCloseMean / 2 output75thPercentile = output75thPercentiles[i] / yesterdayCloseMean / 2 output85thPercentile = output85thPercentiles[i] / yesterdayCloseMean / 2 self.outputData.append([ output15thPercentile, output25thPercentile, output35thPercentile, outputMedian, output65thPercentile, output75thPercentile, output85thPercentile ])