def test_fill_nan_data(): f = 'stock_db/test/SPY.csv' df = fu.load_data_frame(f, datetime.date(2018, 1, 1), datetime.date(2018, 4, 1)) # Test by adding some NaN df.iloc[0:10, 0] = np.nan # beginning df.iloc[11:20, 1] = np.nan # middle df.iloc[-10:, 2] = np.nan # end assert df.isna().any().any() fu.fill_nan_data(df) assert not df.isna().any().any()
def test_load_data_frame(): f = 'stock_db/test/SPY.csv' df = fu.load_data_frame(f, datetime.date(2018, 1, 1), datetime.date(2018, 4, 1)) assert len(fu.get_date(df)) > 0 assert len(fu.get_open(df)) > 0 assert len(fu.get_high(df)) > 0 assert len(fu.get_low(df)) > 0 assert len(fu.get_close(df)) > 0 assert len(fu.get_volume(df)) > 0
def test_load_data_frame_no_adj(): f = 'stock_db/test/SPY.csv' df = fu.load_data_frame(f, datetime.date(2018, 1, 1), datetime.date(2018, 4, 1), False) col = list(df.columns) test_col = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'] assert len(col) == len(test_col) for c in test_col: assert c in col
def _main(): sf = '../stock_db/dj.txt' print("symbol file {} contains the following stocks: {}".format(sf, fu.get_symbols_from_file(sf))) d = '../stock_db/test' s = 'SPY' f = fu.symbol_to_filename(s, d) print("symbol {} with directory {} gives filename {}".format(s, d, f)) print("filename {} gives symbol {}".format(f, fu.filename_to_symbol(f))) print("filename {} gives symbol {}".format(f.upper(), fu.filename_to_symbol(f.upper()))) print("validate_symbol_data {} = {}".format(f, fu.validate_symbol_data(f))) print("directory {} contains the following stocks: {}".format(d, fu.get_all_symbols(d))) start_date = datetime.date(1900, 1, 1) end_date = datetime.date.today() if False: fu.download_data(s, d, start_date, end_date) fu.update_all_symbols(d, start_date, end_date) df = fu.load_data_frame(f, datetime.date(2018, 1, 1), datetime.date(2018, 4, 1)) print(df.describe()) print(df.head()) print(fu.get_date(df)[0:3]) print(fu.get_open(df)[0:3]) print(fu.get_high(df)[0:3]) print(fu.get_low(df)[0:3]) print(fu.get_close(df)[0:3]) print(fu.get_volume(df)[0:3]) # Not applicable for a single stock, but just to test... print(fu.normalize_data_frame(df).head()) # Test by adding some NaN df.iloc[0:10, 0] = np.nan # beginning df.iloc[11:20, 1] = np.nan # middle df.iloc[-10:, 2] = np.nan # end print(df.isna().any()) fu.fill_nan_data(df) print(df.isna().any()) url_array = [ 'https://www.google.ca', 'https://www.bad234123421342134.com', 'https://www.tmall.com', 'https://tmxmoney.com/en/index.html', ] for u in url_array: print(fu.download_url(u)[:50])
def test_fill_nan_data_inplace(): f = 'stock_db/test/SPY.csv' df = fu.load_data_frame(f, datetime.date(2018, 1, 1), datetime.date(2018, 4, 1)) # Test by adding some NaN df.iloc[0:10, 0] = np.nan # beginning df.iloc[11:20, 1] = np.nan # middle df.iloc[-10:, 2] = np.nan # end assert df.isna().any().any() df2 = fu.fill_nan_data(df, inplace=True) # Original df modified: assert not df.isna().any().any() # Returned df2 None assert df2 is None
def test_fill_nan_data_notinplace(): f = 'stock_db/test/SPY.csv' df = fu.load_data_frame(f, datetime.date(2018, 1, 1), datetime.date(2018, 4, 1)) # Test by adding some NaN df.iloc[0:10, 0] = np.nan # beginning df.iloc[11:20, 1] = np.nan # middle df.iloc[-10:, 2] = np.nan # end assert df.isna().any().any() df2 = fu.fill_nan_data(df) # default = not inplace # New df2 modified: assert not df2.isna().any().any() # Original df not modified: assert df.isna().any().any()
def get_symbol_data(self, symbol): """Return a single symbol data as a DataFrame.""" if symbol not in self._dic: f = fu.symbol_to_filename(symbol, self._basedir) if not os.path.exists(f): self.download_data(symbol) df = fu.load_data_frame(f, self._start_date, self._end_date, adjust_price=self._adjust_price) if df is None: print("ERROR: data for {} contains error".format(symbol)) self._dic[symbol] = df # Store it for next time # if data is already there, assume it is up to date (to save repetitive download) return self._dic[symbol]
def test_normalize_data_frame(): f = 'stock_db/test/SPY.csv' df = fu.load_data_frame(f, datetime.date(2018, 1, 1), datetime.date(2018, 4, 1)) # Not applicable for a single stock, but just to test... assert fu.normalize_data_frame(df).iloc[0].mean() == 1.0