def fetch_group_time_series(self, time_series_request_list): data_frame_agg = None time_series_calcs = TimeSeriesCalcs() # depends on the nature of operation as to whether we should use threading or multiprocessing library if Constants().time_series_factory_thread_technique is "thread": from multiprocessing.dummy import Pool else: # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly # note: currently not very stable from multiprocessing_on_dill import Pool thread_no = Constants().time_series_factory_thread_no['other'] if time_series_request_list[0].data_source in Constants().time_series_factory_thread_no: thread_no = Constants().time_series_factory_thread_no[time_series_request_list[0].data_source] pool = Pool(thread_no) # open the market data downloads in their own threads and return the results result = pool.map_async(self.fetch_single_time_series, time_series_request_list) data_frame_group = result.get() pool.close() pool.join() # data_frame_group = results.get() # data_frame_group = results # data_frame_group = None # import multiprocessing as multiprocessing # close the pool and wait for the work to finish # processes = [] # for x in range(0, len(time_series_request_list)): # time_series_request = time_series_request_list[x] # processes = [multiprocessing.Process(target = self.fetch_single_time_series, # args = (x)) for x in time_series_request_list] # pool.apply_async(tsf.harvest_category, args = (category_desc, environment, freq, # exclude_freq_cat, force_new_download_freq_cat, include_freq_cat)) # Run processes # for p in processes: p.start() # Exit the completed processes # for p in processes: p.join() # collect together all the time series if data_frame_group is not None: data_frame_group = [i for i in data_frame_group if i is not None] if data_frame_group is not None: data_frame_agg = time_series_calcs.pandas_outer_join(data_frame_group) # for data_frame_single in data_frame_group: # # if you call for returning multiple tickers, be careful with memory considerations! # if data_frame_single is not None: # if data_frame_agg is not None: # data_frame_agg = data_frame_agg.join(data_frame_single, how='outer') # else: # data_frame_agg = data_frame_single return data_frame_agg
def fetch_group_time_series(self, time_series_request_list): data_frame_agg = None time_series_calcs = TimeSeriesCalcs() # depends on the nature of operation as to whether we should use threading or multiprocessing library if Constants().time_series_factory_thread_technique is "thread": from multiprocessing.dummy import Pool else: # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly # note: currently not very stable from multiprocessing_on_dill import Pool thread_no = Constants().time_series_factory_thread_no['other'] if time_series_request_list[0].data_source in Constants( ).time_series_factory_thread_no: thread_no = Constants().time_series_factory_thread_no[ time_series_request_list[0].data_source] pool = Pool(thread_no) # open the market data downloads in their own threads and return the results result = pool.map_async(self.fetch_single_time_series, time_series_request_list) data_frame_group = result.get() pool.close() pool.join() # data_frame_group = results.get() # data_frame_group = results # data_frame_group = None # import multiprocessing as multiprocessing # close the pool and wait for the work to finish # processes = [] # for x in range(0, len(time_series_request_list)): # time_series_request = time_series_request_list[x] # processes = [multiprocessing.Process(target = self.fetch_single_time_series, # args = (x)) for x in time_series_request_list] # pool.apply_async(tsf.harvest_category, args = (category_desc, environment, freq, # exclude_freq_cat, force_new_download_freq_cat, include_freq_cat)) # Run processes # for p in processes: p.start() # Exit the completed processes # for p in processes: p.join() # collect together all the time series if data_frame_group is not None: data_frame_group = [i for i in data_frame_group if i is not None] if data_frame_group is not None: data_frame_agg = time_series_calcs.pandas_outer_join( data_frame_group) # for data_frame_single in data_frame_group: # # if you call for returning multiple tickers, be careful with memory considerations! # if data_frame_single is not None: # if data_frame_agg is not None: # data_frame_agg = data_frame_agg.join(data_frame_single, how='outer') # else: # data_frame_agg = data_frame_single return data_frame_agg
def download_intraday_tick(self, time_series_request, loader): """ download_intraday_tick - Loads intraday time series from specified data provider Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ data_frame_agg = None time_series_calcs = TimeSeriesCalcs() ticker_cycle = 0 data_frame_group = [] # single threaded version # handle intraday ticker calls separately one by one if len(time_series_request.tickers) == 1 or Constants().time_series_factory_thread_no['other'] == 1: for ticker in time_series_request.tickers: time_series_request_single = copy.copy(time_series_request) time_series_request_single.tickers = ticker if hasattr(time_series_request, 'vendor_tickers'): time_series_request_single.vendor_tickers = [time_series_request.vendor_tickers[ticker_cycle]] ticker_cycle = ticker_cycle + 1 # we downscale into float32, to avoid memory problems in Python (32 bit) # data is stored on disk as float32 anyway data_frame_single = loader.load_ticker(time_series_request_single) # if the vendor doesn't provide any data, don't attempt to append if data_frame_single is not None: if data_frame_single.empty == False: data_frame_single.index.name = 'Date' data_frame_single = data_frame_single.astype('float32') data_frame_group.append(data_frame_single) # # if you call for returning multiple tickers, be careful with memory considerations! # if data_frame_agg is not None: # data_frame_agg = data_frame_agg.join(data_frame_single, how='outer') # else: # data_frame_agg = data_frame_single # key = self.create_category_key(time_series_request, ticker) # fname = self.create_cache_file_name(key) # self._time_series_cache[fname] = data_frame_agg # cache in memory (disable for intraday) # if you call for returning multiple tickers, be careful with memory considerations! if data_frame_group is not None: data_frame_agg = time_series_calcs.pandas_outer_join(data_frame_group) return data_frame_agg else: time_series_request_list = [] # create a list of TimeSeriesRequests for ticker in time_series_request.tickers: time_series_request_single = copy.copy(time_series_request) time_series_request_single.tickers = ticker if hasattr(time_series_request, 'vendor_tickers'): time_series_request_single.vendor_tickers = [time_series_request.vendor_tickers[ticker_cycle]] ticker_cycle = ticker_cycle + 1 time_series_request_list.append(time_series_request_single) return self.fetch_group_time_series(time_series_request_list)
def download_intraday_tick(self, time_series_request, loader): """ download_intraday_tick - Loads intraday time series from specified data provider Parameters ---------- time_series_request : TimeSeriesRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ data_frame_agg = None time_series_calcs = TimeSeriesCalcs() ticker_cycle = 0 data_frame_group = [] # single threaded version # handle intraday ticker calls separately one by one if len(time_series_request.tickers) == 1 or Constants( ).time_series_factory_thread_no['other'] == 1: for ticker in time_series_request.tickers: time_series_request_single = copy.copy(time_series_request) time_series_request_single.tickers = ticker if hasattr(time_series_request, 'vendor_tickers'): time_series_request_single.vendor_tickers = [ time_series_request.vendor_tickers[ticker_cycle] ] ticker_cycle = ticker_cycle + 1 # we downscale into float32, to avoid memory problems in Python (32 bit) # data is stored on disk as float32 anyway data_frame_single = loader.load_ticker( time_series_request_single) # if the vendor doesn't provide any data, don't attempt to append if data_frame_single is not None: if data_frame_single.empty == False: data_frame_single.index.name = 'Date' data_frame_single = data_frame_single.astype('float32') data_frame_group.append(data_frame_single) # # if you call for returning multiple tickers, be careful with memory considerations! # if data_frame_agg is not None: # data_frame_agg = data_frame_agg.join(data_frame_single, how='outer') # else: # data_frame_agg = data_frame_single # key = self.create_category_key(time_series_request, ticker) # fname = self.create_cache_file_name(key) # self._time_series_cache[fname] = data_frame_agg # cache in memory (disable for intraday) # if you call for returning multiple tickers, be careful with memory considerations! if data_frame_group is not None: data_frame_agg = time_series_calcs.pandas_outer_join( data_frame_group) return data_frame_agg else: time_series_request_list = [] # create a list of TimeSeriesRequests for ticker in time_series_request.tickers: time_series_request_single = copy.copy(time_series_request) time_series_request_single.tickers = ticker if hasattr(time_series_request, 'vendor_tickers'): time_series_request_single.vendor_tickers = [ time_series_request.vendor_tickers[ticker_cycle] ] ticker_cycle = ticker_cycle + 1 time_series_request_list.append(time_series_request_single) return self.fetch_group_time_series(time_series_request_list)