def test_nan_insertion(self): '''when the external data source returns a subset of the requested dates NaNs are inserted into the database for the dates not returned. ''' connection = SQLiteTimeseries.connect(':memory:') driver = SQLiteTimeseries(connection=connection, table='price', metric='Adj Close') symbol = 'MSFT' missing_date = datetime.datetime(2012, 12, 4, tzinfo=pytz.UTC) missing_dates = {missing_date} returned_dates = { datetime.datetime(2012, 12, 1, tzinfo=pytz.UTC), datetime.datetime(2012, 12, 2, tzinfo=pytz.UTC), datetime.datetime(2012, 12, 3, tzinfo=pytz.UTC), } requested_dates = missing_dates | returned_dates mock_yahoo = mock.Mock() mock_yahoo.return_value = ((date, 10.) for date in returned_dates) cache = FinancialDataTimeSeriesCache(gets_data=mock_yahoo, database=driver) cached_values = list( cache.get(symbol=symbol, dates=list(requested_dates))) db_val = connection.execute( "SELECT value FROM price WHERE date = '{}'".format( missing_date)).next() self.assertEqual(db_val['value'], 'NaN') cache_value_dict = {date: value for date, value in cached_values} assert np.isnan(cache_value_dict[missing_date])
def test_sqlite(self): cache = FinancialDataTimeSeriesCache.build_sqlite_price_cache(sqlite_file_path=':memory:', table='price', metric='Adj Close') self.run_load_from_cache_yahoo(cache=cache) cache = FinancialDataTimeSeriesCache.build_sqlite_price_cache(sqlite_file_path=':memory:', table='price', metric='Adj Close') self.run_load_from_cache_multiple_tickers(cache=cache)
def test_load_from_cache(self): cache = FinancialDataTimeSeriesCache(gets_data=None, database=None) test_date, test_price = datetime.datetime(2012, 12, 3, tzinfo=pytz.UTC), 100 cache.get = lambda *args, **kwargs : [(test_date, test_price), (datetime.datetime(2012, 12, 4, tzinfo=pytz.UTC), 101), (datetime.datetime(2012, 12, 5, tzinfo=pytz.UTC), 102), ] symbol = 'ABC' df = cache.load_from_cache(start=datetime.datetime(2012, 11, 30, tzinfo=pytz.UTC), end=datetime.datetime(2013, 1, 1, tzinfo=pytz.UTC), stocks=[symbol]) self.assertIsInstance(df, pd.DataFrame) self.assertIn(symbol, df.keys()) self.assertEqual(df[symbol][test_date], test_price)
def sqlite_price_cache(db_file_path=DEFAULT_PRICE_PATH): '''Return a cache that persists prices downloaded from yahoo. ''' return FinancialDataTimeSeriesCache.build_sqlite_price_cache(sqlite_file_path=db_file_path, table='prices', metric='Adj Close')
def mongo_price_cache(mongo_host='localhost', mongo_port=27017): client = pymongo.MongoClient(mongo_host, mongo_port) collection = client.prices.prices db = MongoTimeseries(mongo_collection=collection, metric='price') cache = FinancialDataTimeSeriesCache(gets_data=get_prices_from_yahoo, database=db) return cache
def test_indexes(self): cache = FinancialDataTimeSeriesCache.build_sqlite_price_cache(sqlite_file_path=':memory:', table='price', metric='Adj Close') df = cache.load_from_cache(indexes={'SPX' : '^GSPC'}, start=datetime.datetime(2012, 12, 1, tzinfo=pytz.UTC), end=datetime.datetime(2012, 12, 31, tzinfo=pytz.UTC)) self.assertEqual(df['SPX'][datetime.datetime(2012, 12, 3, tzinfo=pytz.UTC)], 1409.46)
def run_load_from_cache_multiple_tickers(self, cache): cache = FinancialDataTimeSeriesCache.build_sqlite_price_cache(sqlite_file_path=':memory:', table='price', metric='Adj Close') symbols = ['GOOG', 'AAPL'] df = cache.load_from_cache(stocks=symbols, start=datetime.datetime(2012, 12, 1, tzinfo=pytz.UTC), end=datetime.datetime(2012, 12, 31, tzinfo=pytz.UTC)) self.assertEqual(df['GOOG'][datetime.datetime(2012, 12, 3, tzinfo=pytz.UTC)], 695.25) self.assertEqual(df['AAPL'][datetime.datetime(2012, 12, 31, tzinfo=pytz.UTC)], 522.16)
def test_load_from_cache(self): cache = FinancialDataTimeSeriesCache(gets_data=None, database=None) test_date, test_price = datetime.datetime(2012, 12, 3, tzinfo=pytz.UTC), 100 cache.get = lambda *args, **kwargs: [ (test_date, test_price), (datetime.datetime(2012, 12, 4, tzinfo=pytz.UTC), 101), (datetime.datetime(2012, 12, 5, tzinfo=pytz.UTC), 102), ] symbol = 'ABC' df = cache.load_from_cache(start=datetime.datetime(2012, 11, 30, tzinfo=pytz.UTC), end=datetime.datetime(2013, 1, 1, tzinfo=pytz.UTC), stocks=[symbol]) self.assertIsInstance(df, pd.DataFrame) self.assertIn(symbol, df.keys()) self.assertEqual(df[symbol][test_date], test_price)
def test_nan_insertion(self): '''when the external data source returns a subset of the requested dates NaNs are inserted into the database for the dates not returned. ''' connection = SQLiteTimeseries.connect(':memory:') driver = SQLiteTimeseries(connection=connection, table='price', metric='Adj Close') symbol = 'MSFT' missing_date = datetime.datetime(2012, 12, 4, tzinfo=pytz.UTC) missing_dates = {missing_date} returned_dates = {datetime.datetime(2012, 12, 1, tzinfo=pytz.UTC), datetime.datetime(2012, 12, 2, tzinfo=pytz.UTC), datetime.datetime(2012, 12, 3, tzinfo=pytz.UTC), } requested_dates = missing_dates | returned_dates mock_yahoo = mock.Mock() mock_yahoo.return_value = ((date, 10.) for date in returned_dates) cache = FinancialDataTimeSeriesCache(gets_data=mock_yahoo, database=driver) cached_values = list(cache.get(symbol=symbol, dates=list(requested_dates))) db_val = connection.execute("SELECT value FROM price WHERE date = '{}'".format(missing_date)).next() self.assertEqual(db_val['value'], 'NaN') cache_value_dict = {date : value for date, value in cached_values} assert np.isnan(cache_value_dict[missing_date])
def test_indexes(self): cache = FinancialDataTimeSeriesCache.build_sqlite_price_cache( sqlite_file_path=':memory:', table='price', metric='Adj Close') df = cache.load_from_cache(indexes={'SPX': '^GSPC'}, start=datetime.datetime(2012, 12, 1, tzinfo=pytz.UTC), end=datetime.datetime(2012, 12, 31, tzinfo=pytz.UTC)) self.assertEqual( df['SPX'][datetime.datetime(2012, 12, 3, tzinfo=pytz.UTC)], 1409.46)
def run_load_from_cache_multiple_tickers(self, cache): cache = FinancialDataTimeSeriesCache.build_sqlite_price_cache( sqlite_file_path=':memory:', table='price', metric='Adj Close') symbols = ['GOOG', 'AAPL'] df = cache.load_from_cache(stocks=symbols, start=datetime.datetime(2012, 12, 1, tzinfo=pytz.UTC), end=datetime.datetime(2012, 12, 31, tzinfo=pytz.UTC)) self.assertEqual( df['GOOG'][datetime.datetime(2012, 12, 3, tzinfo=pytz.UTC)], 695.25) self.assertEqual( df['AAPL'][datetime.datetime(2012, 12, 31, tzinfo=pytz.UTC)], 522.16)
def _build_mongo_cache(self): db_driver = MongoTimeseries(mongo_collection=self.collection, metric='Adj Close') cache = FinancialDataTimeSeriesCache( gets_data=prices.get_prices_from_yahoo, database=db_driver) return cache