def construct_vendor_md_request(self, md_request): """Creates a MarketDataRequest with the vendor tickers Parameters ---------- md_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- MarketDataRequest """ symbols_vendor = self.translate_to_vendor_ticker(md_request) fields_vendor = self.translate_to_vendor_field(md_request) md_request_vendor = MarketDataRequest( md_request=md_request) md_request_vendor.tickers = symbols_vendor md_request_vendor.fields = fields_vendor md_request_vendor.old_tickers = md_request.tickers return md_request_vendor
def get_fx_cross_tick(self, start, end, cross, cut="NYC", data_source="dukascopy", cache_algo='internet_load_return', type='spot', environment='backtest', fields=['bid', 'ask']): if isinstance(cross, str): cross = [cross] market_data_request = MarketDataRequest( gran_freq="tick", freq_mult=1, freq='tick', cut=cut, fields=['bid', 'ask', 'bidv', 'askv'], cache_algo=cache_algo, environment=environment, start_date=start, finish_date=end, data_source=data_source, category='fx') market_data_generator = self.market_data_generator data_frame_agg = None for cr in cross: if (type == 'spot'): market_data_request.tickers = cr cross_vals = market_data_generator.fetch_market_data( market_data_request) # if user only wants 'close' calculate that from the bid/ask fields if fields == ['close']: cross_vals = cross_vals[[cr + '.bid', cr + '.ask']].mean(axis=1) cross_vals.columns = [cr + '.close'] else: filter = Filter() filter_columns = [cr + '.' + f for f in fields] cross_vals = filter.filter_time_series_by_columns( filter_columns, cross_vals) if data_frame_agg is None: data_frame_agg = cross_vals else: data_frame_agg = data_frame_agg.join(cross_vals, how='outer') # strip the nan elements data_frame_agg = data_frame_agg.dropna() return data_frame_agg
def get_base_depos(self, start, end, currencies, tenor, cut="NYC", data_source="bloomberg", cache_algo="internet_load_return"): """Gets the deposit rates for a particular tenor and part of surface Parameter --------- start : Datetime Start date end : Datetime End data currencies : str Currencies for which we want to download deposit rates tenor : str Tenor of deposit rate cut : str Closing time of the market data data_source : str data_source of the market data eg. bloomberg cache_algo : str Caching scheme for the data Returns ------- pandas.DataFrame Contains deposit rates """ market_data_generator = self.market_data_generator if isinstance(currencies, str): currencies = [currencies] if isinstance(tenor, str): tenor = [tenor] tickers = [] for cr in currencies: for tn in tenor: tickers.append(cr + tn) market_data_request = MarketDataRequest(start_date=start, finish_date=end, data_source=data_source, category='base-depos', freq='daily', cut=cut, tickers=tickers, fields=['close'], cache_algo=cache_algo, environment='backtest') data_frame = market_data_generator.fetch_market_data( market_data_request) data_frame.index.name = 'Date' return data_frame
def get_fx_forward_points(self, start, end, cross, tenor, cut="BGN", source="bloomberg", cache_algo="internet_load_return"): """ get_forward_points = get forward points for specified cross, tenor and part of surface :param start: start date :param end: end date :param cross: asset to be calculated :param tenor: tenor to calculate :param cut: closing time of data :param source: source of data eg. bloomberg :return: forward points """ market_data_request = MarketDataRequest() market_data_generator = self.market_data_generator market_data_request.data_source = source # use bbg as a source market_data_request.start_date = start # start_date market_data_request.finish_date = end # finish_date if isinstance(cross, str): cross = [cross] if isinstance(tenor, str): tenor = [tenor] tenor = [x.replace('1Y', '12M') for x in tenor] tickers = [] for cr in cross: for tn in tenor: tickers.append(cr + tn) market_data_request = MarketDataRequest(start_date=start, finish_date=end, data_source=source, category='fx-forwards', freq='daily', cut=cut, tickers=tickers, fields=['close'], cache_algo=cache_algo, environment='backtest') data_frame = market_data_generator.fetch_market_data( market_data_request) data_frame.columns = [ x.replace('12M', '1Y') for x in data_frame.columns ] data_frame.index.name = 'Date' return data_frame
def get_fx_implied_vol(self, start, end, cross, tenor, cut="BGN", data_source="bloomberg", part="V", cache_algo="internet_load_return"): """Get implied vol for specified cross, tenor and part of surface Parameters ---------- start : Datetime start date of request end : Datetime end date of request cross : str FX cross tenor : str tenor of implied vol cut : str closing time of data data_source : str data_source of market data eg. bloomberg part : str part of vol surface eg. V for ATM implied vol, 25R 25 delta risk reversal Return ------ pandas.DataFrame """ market_data_generator = self.market_data_generator tickers = self.get_labels(cross, part, tenor) market_data_request = MarketDataRequest(start_date=start, finish_date=end, data_source=data_source, category='fx-implied-vol', freq='daily', cut=cut, tickers=tickers, fields=['close'], cache_algo=cache_algo, environment='backtest') data_frame = market_data_generator.fetch_market_data( market_data_request) data_frame.index.name = 'Date' return data_frame
def get_fx_implied_vol(self, start, end, cross, tenor, cut="BGN", source="bloomberg", part="V", cache_algo="internet_load_return"): """ get_implied_vol = get implied vol for specified cross, tenor and part of surface :param start: start date :param end: end date :param cross: asset to be calculated :param tenor: tenor to calculate :param cut: closing time of data :param source: source of data eg. bloomberg :param part: part of vol surface eg. V for ATM implied vol, 25R 25 delta risk reversal :return: realised volatility """ market_data_generator = self.market_data_generator if isinstance(cross, str): cross = [cross] if isinstance(tenor, str): tenor = [tenor] if isinstance(part, str): part = [part] tickers = [] for cr in cross: for tn in tenor: for pt in part: tickers.append(cr + pt + tn) market_data_request = MarketDataRequest(start_date=start, finish_date=end, data_source=source, category='fx-implied-vol', freq='daily', cut=cut, tickers=tickers, fields=['close'], cache_algo=cache_algo, environment='backtest') data_frame = market_data_generator.fetch_market_data( market_data_request) data_frame.index.name = 'Date' return data_frame
def get_base_depos(self, start, end, currencies, tenor, cut="NYC", source="bloomberg", cache_algo="internet_load_return"): """ get_forward_points = get forward points for specified cross, tenor and part of surface :param start: start date :param end: end date :param cross: asset to be calculated :param tenor: tenor to calculate :param cut: closing time of data :param source: source of data eg. bloomberg :return: forward points """ market_data_generator = self.market_data_generator if isinstance(currencies, str): currencies = [currencies] if isinstance(tenor, str): tenor = [tenor] tickers = [] for cr in currencies: for tn in tenor: tickers.append(cr + tn) market_data_request = MarketDataRequest(start_date=start, finish_date=end, data_source=source, category='base-depos', freq='daily', cut=cut, tickers=tickers, fields=['close'], cache_algo=cache_algo, environment='backtest') data_frame = market_data_generator.fetch_market_data( market_data_request) data_frame.index.name = 'Date' return data_frame
def create_time_series_hash_key(self, market_data_request, ticker = None): """Creates a hash key for retrieving the time series Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- str """ if(isinstance(ticker, list)): ticker = ticker[0] return self.create_cache_file_name(MarketDataRequest().create_category_key(market_data_request, ticker))
def get_base_depos(self, start, end, currencies, tenor, cut="NYC", data_source="bloomberg", cache_algo="internet_load_return"): """Gets the deposit rates for a particular tenor and part of surface Parameter --------- start : DateTime Start date end : DateTime End data currencies : str Currencies for which we want to download deposit rates tenor : str Tenor of deposit rate cut : str Closing time of the market data data_source : str data_source of the market data eg. bloomberg cache_algo : str Caching scheme for the data Returns ------- pd.DataFrame Contains deposit rates """ market_data_generator = self._market_data_generator if tenor is None: tenor = constants.base_depos_tenor if isinstance(currencies, str): currencies = [currencies] if isinstance(tenor, str): tenor = [tenor] tickers = [] for cr in currencies: for tn in tenor: tickers.append(cr + tn) # Special case for Fed Funds Effective Rate if 'USDFedEffectiveRate' not in tickers: tickers.append("USDFedEffectiveRate") # For depos there usually isn't a 10AM NYC cut available, so just use TOK data if cut == '10AM': cut = 'TOK' market_data_request = MarketDataRequest(start_date=start, finish_date=end, data_source=data_source, category='base-depos', freq='daily', cut=cut, tickers=tickers, fields=['close'], cache_algo=cache_algo, environment='backtest') data_frame = market_data_generator.fetch_market_data( market_data_request) data_frame.index.name = 'Date' return data_frame
def get_fx_forward_points(self, start, end, cross, tenor, cut="BGN", data_source="bloomberg", cache_algo="internet_load_return"): """Gets the forward points for a particular tenor and currency Parameter --------- start : Datetime Start date end : Datetime End data cross : str FX crosses for which we want to download forward points tenor : str Tenor of deposit rate cut : str Closing time of the market data data_source : str data_source of the market data eg. bloomberg cache_algo : str Caching scheme for the data Returns ------- pd.DataFrame Contains deposit rates """ # market_data_request = MarketDataRequest() market_data_generator = self._market_data_generator # market_data_request.data_source = data_source # use bbg as a data_source # market_data_request.start_date = start # start_date # market_data_request.finish_date = end # finish_date if tenor is None: tenor = constants.fx_forwards_tenor if isinstance(cross, str): cross = [cross] if isinstance(tenor, str): tenor = [tenor] # Tickers are often different on Bloomberg for forwards/depos vs vol, so want consistency so 12M is always 1Y tenor = [x.replace('1Y', '12M') for x in tenor] tickers = [] for cr in cross: for tn in tenor: tickers.append(cr + tn) market_data_request = MarketDataRequest(start_date=start, finish_date=end, data_source=data_source, category='fx-forwards', freq='daily', cut=cut, tickers=tickers, fields=['close'], cache_algo=cache_algo, environment='backtest') data_frame = market_data_generator.fetch_market_data( market_data_request) data_frame.columns = [ x.replace('12M', '1Y') for x in data_frame.columns ] data_frame.index.name = 'Date' return data_frame
def get_fx_cross(self, start, end, cross, cut="NYC", data_source="bloomberg", freq="intraday", cache_algo='internet_load_return', type='spot', environment='backtest', fields=['close']): if data_source == "gain" or data_source == 'dukascopy' or freq == 'tick': return self.get_fx_cross_tick(start, end, cross, cut=cut, data_source=data_source, cache_algo=cache_algo, type='spot', fields=fields) if isinstance(cross, str): cross = [cross] market_data_request_list = [] freq_list = [] type_list = [] for cr in cross: market_data_request = MarketDataRequest(freq_mult=1, cut=cut, fields=['close'], freq=freq, cache_algo=cache_algo, start_date=start, finish_date=end, data_source=data_source, environment=environment) market_data_request.type = type market_data_request.cross = cr if freq == 'intraday': market_data_request.gran_freq = "minute" # intraday elif freq == 'daily': market_data_request.gran_freq = "daily" # daily market_data_request_list.append(market_data_request) data_frame_agg = [] # Depends on the nature of operation as to whether we should use threading or multiprocessing library if constants.market_thread_technique is "thread": from multiprocessing.dummy import Pool else: # Most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing # must use the multiprocess library otherwise can't pickle objects correctly # note: currently not very stable from multiprocess import Pool thread_no = constants.market_thread_no['other'] if market_data_request_list[ 0].data_source in constants.market_thread_no: thread_no = constants.market_thread_no[ market_data_request_list[0].data_source] # Fudge, issue with multithreading and accessing HDF5 files # if self._market_data_generator.__class__.__name__ == 'CachedMarketDataGenerator': # thread_no = 0 thread_no = 0 if (thread_no > 0): pool = Pool(thread_no) # Open the market data downloads in their own threads and return the results df_list = pool.map_async(self._get_individual_fx_cross, market_data_request_list).get() data_frame_agg = self._calculations.iterative_outer_join(df_list) # data_frame_agg = self._calculations.pandas_outer_join(result.get()) try: pool.close() pool.join() except: pass else: for md_request in market_data_request_list: data_frame_agg.append( self._get_individual_fx_cross(md_request)) data_frame_agg = self._calculations.pandas_outer_join( data_frame_agg) # Strip the nan elements data_frame_agg = data_frame_agg.dropna(how='all') # self.speed_cache.put_dataframe(key, data_frame_agg) return data_frame_agg
def get_fx_implied_vol(self, start, end, cross, tenor, cut="BGN", data_source="bloomberg", part="V", cache_algo="internet_load_return", environment='backtest'): """Get implied vol for specified cross, tenor and part of surface. By default we use Bloomberg, but we could use any data provider for which we have vol tickers. Note, that for Bloomberg not every point will be quoted for each dataset (typically, BGN will have more points than for example LDN) Parameters ---------- start : datetime start date of request end : datetime end date of request cross : str FX cross tenor : str tenor of implied vol cut : str closing time of data data_source : str data_source of market data eg. bloomberg part : str part of vol surface eg. V for ATM implied vol, 25R 25 delta risk reversal Return ------ pd.DataFrame """ market_data_generator = self._market_data_generator if tenor is None: tenor = constants.fx_vol_tenor if part is None: part = constants.fx_vol_part tickers = self.get_labels(cross, part, tenor) market_data_request = MarketDataRequest(start_date=start, finish_date=end, data_source=data_source, category='fx-implied-vol', freq='daily', cut=cut, tickers=tickers, fields=['close'], cache_algo=cache_algo, environment=environment) data_frame = market_data_generator.fetch_market_data( market_data_request) # data_frame.index.name = 'Date' # Special case for 10AM NYC cut # - get some historical 10AM NYC data (only available on BBG for a few years, before 2007) # - fill the rest with a weighted average of TOK/LDN closes if cut == "10AM": # Where we have actual 10am NY data use that & overwrite earlier estimated data (next) vol_data_10am = data_frame # As for most dates we probably won't have 10am data, so drop rows where there's no data at all # Can have the situation where some data won't be there (eg. longer dated illiquid tenors) if vol_data_10am is not None: vol_data_10am = vol_data_10am.dropna( how='all') # Only have limited ON 10am cut data # Now get LDN and TOK vol data to fill any gaps vol_data_LDN = self.get_fx_implied_vol(start=start, end=end, cross=cross, tenor=tenor, data_source=data_source, cut='LDN', part=part, cache_algo=cache_algo) vol_data_TOK = self.get_fx_implied_vol(start=start, end=end, cross=cross, tenor=tenor, data_source=data_source, cut='TOK', part=part, cache_algo=cache_algo) # vol_data_LDN.index = pandas.DatetimeIndex(vol_data_LDN.index) # vol_data_TOK.index = pandas.DatetimeIndex(vol_data_TOK.index) old_cols = vol_data_LDN.columns vol_data_LDN.columns = vol_data_LDN.columns.values + "LDN" vol_data_TOK.columns = vol_data_TOK.columns.values + "TOK" data_frame = vol_data_LDN.join(vol_data_TOK, how='outer') # Create very naive average of LDN and TOK to estimate 10am NY value because we often don't have this data # Note, this isn't perfect, particularly on days where you have payrolls data, and we're looking at ON data for col in old_cols: data_frame[col] = (1 * data_frame[col + "LDN"] + 3 * data_frame[col + "TOK"]) / 4 data_frame.pop(col + "LDN") data_frame.pop(col + "TOK") # Get TOK/LDN vol data before 10am and after 10am (10am data is only available for a few years) # If we have no original 10am data don't bother if vol_data_10am is not None: if not (vol_data_10am.empty): pre_vol_data = data_frame[ data_frame.index < vol_data_10am.index[0]] post_vol_data = data_frame[ data_frame.index > vol_data_10am.index[-1]] data_frame = (pre_vol_data.append(vol_data_10am) ).append(post_vol_data) # data_frame.index = pandas.to_datetime(data_frame.index) return data_frame
def fetch_single_time_series(self, md_request): md_request = MarketDataRequest(md_request=md_request) # Only includes those tickers have not expired yet! start_date = pd.Timestamp(md_request.start_date).date() current_date = pd.Timestamp(datetime.datetime.utcnow().date()) tickers = md_request.tickers vendor_tickers = md_request.vendor_tickers expiry_date = pd.Timestamp(md_request.expiry_date) config = ConfigManager().get_instance() # In many cases no expiry is defined so skip them for i in range(0, len(tickers)): try: expiry_date = config.get_expiry_for_ticker( md_request.data_source, tickers[i]) except: pass if expiry_date is not None: expiry_date = pd.Timestamp(expiry_date) if not (pd.isna(expiry_date)): # Use pandas Timestamp, a bit more robust with weird dates # (can fail if comparing date vs datetime) # if the expiry is before the start date of our download # don"t bother downloading this ticker if expiry_date < start_date: tickers[i] = None # Special case for futures-contracts which are intraday # avoid downloading if the expiry date is very far in the # past # (we need this before there might be odd situations where # we run on an expiry date, but still want to get # data right till expiry time) if md_request.category == "futures-contracts" \ and md_request.freq == "intraday" \ and self._days_expired_intraday_contract_download \ > 0: if expiry_date + pd.Timedelta( days= self._days_expired_intraday_contract_download) \ < current_date: tickers[i] = None if vendor_tickers is not None and tickers[i] is None: vendor_tickers[i] = None md_request.tickers = [e for e in tickers if e != None] if vendor_tickers is not None: md_request.vendor_tickers = [e for e in vendor_tickers if e != None] df_single = None if len(md_request.tickers) > 0: df_single = self.get_data_vendor( md_request).load_ticker(md_request) if df_single is not None: if df_single.empty == False: df_single.index.name = "Date" # Will fail for DataFrames which includes dates/strings # eg. futures contract names df_single = Calculations().convert_to_numeric_dataframe( df_single) if md_request.freq == "second": df_single = df_single.resample("1s") return df_single
def download_daily(self, md_request): """Loads daily time series from specified data provider Parameters ---------- md_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ key = MarketDataRequest().create_category_key( md_request=md_request) is_key_overriden = False for k in constants.override_multi_threading_for_categories: if k in key: is_key_overriden = True break # By default use other thread_no = constants.market_thread_no["other"] if str(md_request.data_source) in constants.market_thread_no: thread_no = constants.market_thread_no[ md_request.data_source] # Daily data does not include ticker in the key, as multiple tickers # in the same file if thread_no == 1 or ".csv" in str(md_request.data_source) or \ ".h5" in str( md_request.data_source) or ".parquet" in str( md_request.data_source) \ or ".zip" in str( md_request.data_source) or md_request.data_engine is not None: # df_agg = data_vendor.load_ticker(md_request) df_agg = self.fetch_single_time_series(md_request) else: md_request_list = [] # When trying your example "equitiesdata_example" I had a -1 result # so it went out of the comming loop and I had errors in execution group_size = max( int(len(md_request.tickers) / thread_no - 1), 0) if group_size == 0: group_size = 1 # Split up tickers into groups related to number of threads to call for i in range(0, len(md_request.tickers), group_size): md_request_single = copy.copy(md_request) md_request_single.tickers = \ md_request.tickers[i:i + group_size] if md_request.vendor_tickers is not None: md_request_single.vendor_tickers = \ md_request.vendor_tickers[i:i + group_size] md_request_list.append(md_request_single) # Special case where we make smaller calls one after the other if is_key_overriden: df_list = [] for md in md_request_list: df_list.append(self.fetch_single_time_series(md)) df_agg = self._calculations.join(df_list, how="outer") else: df_agg = self.fetch_group_time_series( md_request_list) return df_agg
def load_ticker(self, md_request): """Retrieves market data from external data source (in this case Bloomberg) Parameters ---------- md_request : MarketDataRequest contains all the various parameters detailing time series start and finish, tickers etc Returns ------- DataFrame """ constants = DataConstants() md_request = MarketDataRequest(md_request=md_request) md_request_vendor = self.construct_vendor_md_request(md_request) data_frame = None logger = LoggerManager().getLogger(__name__) logger.info("Request Bloomberg data") # Do we need daily or intraday data? if (md_request.freq in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly']): # Work out the fields which need to be downloaded via Bloomberg ref request (BDP) and # those that can be downloaded via Historical request (BDH) ref_fields = [] ref_vendor_fields = [] # Get user defined list of BBG fields/vendor fields which need to # be downloaded by BDP bbg_ref_fields = list(constants.bbg_ref_fields.keys()) bbg_ref_vendor_fields = list(constants.bbg_ref_fields.values()) for i in range(0, len(md_request.fields)): if md_request.fields[i] in bbg_ref_fields \ or md_request_vendor.fields[ i] in bbg_ref_vendor_fields: ref_fields.append(md_request.fields[i]) ref_vendor_fields.append(md_request_vendor.fields[i]) non_ref_fields = [] non_ref_vendor_fields = [] for i in range(0, len(md_request.fields)): if md_request.fields[i] not in bbg_ref_fields \ and md_request_vendor.fields[ i] not in bbg_ref_vendor_fields: non_ref_fields.append(md_request.fields[i]) non_ref_vendor_fields.append(md_request_vendor.fields[i]) # For certain cases, need to use ReferenceDataRequest # eg. for events times/dates, last tradeable date fields (when specified) if len(ref_fields) > 0: # Careful: make sure you copy the market data request object # (when threading, altering that can # cause concurrency issues!) old_fields = copy.deepcopy(md_request.fields) old_vendor_fields = copy.deepcopy(md_request_vendor.fields) # md_request = MarketDataRequest(md_request=md_request_copy) md_request.fields = ref_fields md_request.vendor_fields = ref_vendor_fields md_request_vendor = self.construct_vendor_md_request( md_request) # Just select those reference fields to download via reference datetime_data_frame = self.get_reference_data( md_request_vendor, md_request) # Download all the other event or non-ref fields # (uses HistoricalDataRequest to Bloomberg) # concatenate with date time fields if len(non_ref_fields) > 0: md_request.fields = non_ref_fields md_request.vendor_fields = non_ref_vendor_fields md_request_vendor = self.construct_vendor_md_request( md_request) events_data_frame = self.get_daily_data( md_request, md_request_vendor) col = events_data_frame.index.name events_data_frame = events_data_frame.reset_index( drop=False) data_frame = pd.concat( [events_data_frame, datetime_data_frame], axis=1) temp = data_frame[col] del data_frame[col] data_frame.index = temp else: data_frame = datetime_data_frame md_request.fields = copy.deepcopy(old_fields) md_request_vendor.fields = copy.deepcopy(old_vendor_fields) # For all other daily/monthly/quarter data, we can use # HistoricalDataRequest to Bloomberg else: data_frame = self.get_daily_data(md_request, md_request_vendor) # if data_frame is not None: # # Convert fields with release-dt to dates (special case!) and assume everything else numerical # for c in data_frame.columns: # try: # if 'release-dt' in c: # data_frame[c] = (data_frame[c]).astype('int').astype(str).apply( # lambda x: pd.to_datetime(x, format='%Y%m%d')) # else: # data_frame[c] = pd.to_numeric(data_frame[c]) # except: # pass # Assume one ticker only for intraday data and use IntradayDataRequest # to Bloomberg if (md_request.freq in ['tick', 'intraday', 'second', 'minute', 'hourly']): md_request_vendor.tickers = \ md_request_vendor.tickers[0] if md_request.freq in ['tick', 'second']: data_frame = self.download_tick(md_request_vendor) else: data_frame = self.download_intraday(md_request_vendor) if data_frame is not None: if data_frame.empty: try: logger.info("No tickers returned for: " + md_request_vendor.tickers) except: pass return None cols = data_frame.columns.values import pytz try: data_frame = data_frame.tz_localize(pytz.utc) except: data_frame = data_frame.tz_convert(pytz.utc) cols = md_request.tickers[0] + "." + cols data_frame.columns = cols logger.info("Completed request from Bloomberg.") return data_frame
def fetch_single_time_series(self, market_data_request): market_data_request = MarketDataRequest(md_request=market_data_request) # only includes those tickers have not expired yet! start_date = pandas.Timestamp(market_data_request.start_date).date() import datetime current_date = datetime.datetime.utcnow().date() from datetime import timedelta tickers = market_data_request.tickers vendor_tickers = market_data_request.vendor_tickers expiry_date = market_data_request.expiry_date config = ConfigManager().get_instance() # in many cases no expiry is defined so skip them for i in range(0, len(tickers)): try: expiry_date = config.get_expiry_for_ticker( market_data_request.data_source, tickers[i]) except: pass if expiry_date is not None: expiry_date = pandas.Timestamp(expiry_date).date() # use pandas Timestamp, a bit more robust with weird dates (can fail if comparing date vs datetime) # if the expiry is before the start date of our download don't bother downloading this ticker if expiry_date < start_date: tickers[i] = None # special case for futures-contracts which are intraday # avoid downloading if the expiry date is very far in the past # (we need this before there might be odd situations where we run on an expiry date, but still want to get # data right till expiry time) if market_data_request.category == 'futures-contracts' and market_data_request.freq == 'intraday' \ and self.days_expired_intraday_contract_download > 0: if expiry_date + timedelta( days=self.days_expired_intraday_contract_download ) < current_date: tickers[i] = None if vendor_tickers is not None and tickers[i] is None: vendor_tickers[i] = None market_data_request.tickers = [e for e in tickers if e != None] if vendor_tickers is not None: market_data_request.vendor_tickers = [ e for e in vendor_tickers if e != None ] data_frame_single = None if len(market_data_request.tickers) > 0: data_frame_single = self.get_data_vendor( market_data_request.data_source).load_ticker( market_data_request) #print(data_frame_single.head(n=10)) if data_frame_single is not None: if data_frame_single.empty == False: data_frame_single.index.name = 'Date' # will fail for dataframes which includes dates/strings (eg. futures contract names) try: data_frame_single = data_frame_single.astype('float32') except: self.logger.warning('Could not convert to float') if market_data_request.freq == "second": data_frame_single = data_frame_single.resample("1s") return data_frame_single
def download_daily(self, market_data_request): """Loads daily time series from specified data provider Parameters ---------- market_data_request : MarketDataRequest contains various properties describing time series to fetched, including ticker, start & finish date etc. Returns ------- pandas.DataFrame """ key = MarketDataRequest().create_category_key(market_data_request) is_key_overriden = False for k in DataConstants().override_multi_threading_for_categories: if k in key: is_key_overriden = True break # by default use other thread_no = DataConstants().market_thread_no['other'] if market_data_request.data_source in DataConstants().market_thread_no: thread_no = DataConstants().market_thread_no[ market_data_request.data_source] # daily data does not include ticker in the key, as multiple tickers in the same file if thread_no == 1: # data_frame_agg = data_vendor.load_ticker(market_data_request) data_frame_agg = self.fetch_single_time_series(market_data_request) else: market_data_request_list = [] # when trying your example 'equitiesdata_example' I had a -1 result so it went out of the comming loop and I had errors in execution group_size = max( int(len(market_data_request.tickers) / thread_no - 1), 0) if group_size == 0: group_size = 1 # split up tickers into groups related to number of threads to call for i in range(0, len(market_data_request.tickers), group_size): market_data_request_single = copy.copy(market_data_request) market_data_request_single.tickers = market_data_request.tickers[ i:i + group_size] if market_data_request.vendor_tickers is not None: market_data_request_single.vendor_tickers = \ market_data_request.vendor_tickers[i:i + group_size] market_data_request_list.append(market_data_request_single) # special case where we make smaller calls one after the other if is_key_overriden: data_frame_list = [] for md in market_data_request_list: data_frame_list.append(self.fetch_single_time_series(md)) data_frame_agg = self.calculations.pandas_outer_join( data_frame_list) else: data_frame_agg = self.fetch_group_time_series( market_data_request_list) # fname = self.create_cache_file_name(key) # self._time_series_cache[fname] = data_frame_agg # cache in memory (ok for daily data) return data_frame_agg