def generate_test_transform_treasuries_data(): data = data_loader.download_csv(data_loader.treasuries_config['TREASURIES_URL']) nb_rows = len(data) test_data = data[:10] + data[nb_rows - 10:] utils.serialise_obj(test_data, 'testdata/test_transform_treasuries_data.data.py') utils.serialise_obj(data_loader.transform_treasuries_data(test_data), 'testdata/test_transform_treasuries_data.result.py')
def generate_test_transform_google_timeseries(): symbol = 'GOOG' start = datetime.datetime(2013, 10, 11) end = datetime.datetime(2013, 11, 11) data = data_loader.download_google_timeseries_raw(symbol, start, end) utils.serialise_obj(data, 'testdata/test_transform_google_timeseries.data.py') utils.serialise_obj(data_loader.transform_google_timeseries(data, symbol), 'testdata/test_transform_google_timeseries.result.py')
def get_time_series(loader, loader_args): ''' Interrogate the cache for the requested series If it doesn't exit, call the loader function from the data_loader module with the dictionary args and add it to the cache ''' # Get the time series from the cache/db # For now we just get back from the cache based on whether the exact # same call has been made before # TODO add support to interrogate a db for whether the data can be found # in the cache id = get_id(loader, loader_args) def get_ts(): if id is not None: return get_from_cache(id) return None ts = get_ts() # If the time series is not in the cache/db, load it using the loader # function if not ts: ts = getattr(data_loader, loader)(**loader_args) # Add the time series to the cache/db # Ticky logic here while we transition to mongo... # If the id is none, we're using the db but the series is not in # the db. Therefore create a record for it in the db and pass the # newly created id onto the file cache # There's definite scope here for synchronisation between the file # cache and db meta data to cause compilications. Perhaps the time # series should be put in the db too... if id is None: db_client = get_db() if db_client is None: raise Exception('id not set in db mode {0}'.format(config.DB)) id = db_client.insert(loader, loader_args) # If the db as added an object id, remove it, for now if loader_args.has_key(db.OBJECT_ID): del loader_args[db.OBJECT_ID] utils.serialise_obj(ts, get_cache_filename(id)) return ts
def test_serialise_deserialise_obj(self): ''' This tests the round trip: serialise an object then deserialise it and check we get back what we started with It's intended as a test for the two functions: utils.serialise_obj utils.deserialise_obj ''' data = { 'a': [5.97, 2.97, 8.2502, 4], 'b': 'hello', 'c': True } fd, tmpfile = tempfile.mkstemp(suffix='.py') try: utils.serialise_obj(data, tmpfile) result = utils.deserialise_obj(tmpfile) finally: os.remove(tmpfile) self.assertEqual(data, result)
METADATA: { 'symbol': symbol, 'start': start, 'end': end } } } ################################################################################ if __name__ == '__main__': logging.basicConfig(level='DEBUG') import utils # utils.serialise_obj( # download_yahoo_timeseries_raw( # 'IBM', # datetime.datetime(2012, 11, 11), # datetime.datetime(2013, 11, 11)), # 'cache/yahoo_data.py') utils.serialise_obj( download_google_timeseries_raw( 'GOOG', datetime.datetime(2012, 11, 11), datetime.datetime(2013, 11, 11)), 'cache/google_data.py') # utils.serialise_obj(download_treasuries(), 'cache/treasuries_data.py') ################################################################################