Пример #1
0
    def bus_day_of_month_seasonality(
        self,
        data_frame,
        month_list=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
        cum=True,
        cal="FX",
        partition_by_month=True,
    ):

        tsc = TimeSeriesCalcs()
        tsf = TimeSeriesFilter()

        data_frame.index = pandas.to_datetime(data_frame.index)
        data_frame = tsf.filter_time_series_by_holidays(data_frame, cal)

        monthly_seasonality = tsc.average_by_month_day_by_bus_day(data_frame, cal)
        monthly_seasonality = monthly_seasonality.loc[month_list]

        if partition_by_month:
            monthly_seasonality = monthly_seasonality.unstack(level=0)

        if cum is True:
            monthly_seasonality.ix[0] = numpy.zeros(len(monthly_seasonality.columns))

            if partition_by_month:
                monthly_seasonality.index = monthly_seasonality.index + 1  # shifting index
                monthly_seasonality = monthly_seasonality.sort()  # sorting by index

            monthly_seasonality = tsc.create_mult_index(monthly_seasonality)

        return monthly_seasonality
    def get_time_series_cached(self, time_series_request):
        """
        get_time_series_cached - Loads time series from cache (if it exists)

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        if (time_series_request.freq == "intraday"):
            ticker = time_series_request.tickers
        else:
            ticker = None

        fname = self.create_time_series_hash_key(time_series_request, ticker)

        if (fname in self._time_series_cache):
            data_frame = self._time_series_cache[fname]

            tsf = TimeSeriesFilter()

            return tsf.filter_time_series(time_series_request, data_frame)

        return None
Пример #3
0
    def get_surprise_against_intraday_moves_over_event(
            self,
            data_frame_cross_orig,
            cross,
            event_fx,
            event_name,
            start,
            end,
            offset_list=[1, 5, 30, 60],
            add_surprise=False,
            surprise_field='survey-average'):

        tsf = TimeSeriesFilter()
        fields = [
            'actual-release', 'survey-median', 'survey-average', 'survey-high',
            'survey-low'
        ]

        ef_time_frame = self.get_economic_event_date_time_fields(
            fields, event_fx, event_name)
        ef_time_frame = tsf.filter_time_series_by_date(start, end,
                                                       ef_time_frame)

        return self.get_surprise_against_intraday_moves_over_custom_event(
            data_frame_cross_orig,
            ef_time_frame,
            cross,
            event_fx,
            event_name,
            start,
            end,
            offset_list=offset_list,
            add_surprise=add_surprise,
            surprise_field=surprise_field)
Пример #4
0
    def get_intraday_moves_over_event(self,
                                      data_frame_rets,
                                      cross,
                                      event_fx,
                                      event_name,
                                      start,
                                      end,
                                      vol,
                                      mins=3 * 60,
                                      min_offset=0,
                                      create_index=False,
                                      resample=False,
                                      freq='minutes'):
        tsf = TimeSeriesFilter()

        ef_time_frame = self.get_economic_event_date_time_dataframe(
            event_fx, event_name)
        ef_time_frame = tsf.filter_time_series_by_date(start, end,
                                                       ef_time_frame)

        return self.get_intraday_moves_over_custom_event(
            data_frame_rets,
            ef_time_frame,
            vol,
            mins=mins,
            min_offset=min_offset,
            create_index=create_index,
            resample=resample,
            freq=freq)  #, start, end)
Пример #5
0
    def get_time_series_cached(self, time_series_request):
        """
        get_time_series_cached - Loads time series from cache (if it exists)

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        if (time_series_request.freq == "intraday"):
            ticker = time_series_request.tickers
        else:
            ticker = None

        fname = self.create_time_series_hash_key(time_series_request, ticker)

        if (fname in self._time_series_cache):
            data_frame = self._time_series_cache[fname]

            tsf = TimeSeriesFilter()

            return tsf.filter_time_series(time_series_request, data_frame)

        return None
Пример #6
0
    def get_economic_event_ret_over_custom_event_day(self, data_frame_in, event_dates, name, event, start, end, lagged = False,
                                              NYC_cutoff = 10):

        time_series_filter = TimeSeriesFilter()
        event_dates = time_series_filter.filter_time_series_by_date(start, end, event_dates)

        data_frame = data_frame_in.copy(deep=True) # because we change the dates!

        time_series_tz = TimeSeriesTimezone()
        calendar = Calendar()

        bday = CustomBusinessDay(weekmask='Mon Tue Wed Thu Fri')

        event_dates_nyc = time_series_tz.convert_index_from_UTC_to_new_york_time(event_dates)
        average_hour_nyc = numpy.average(event_dates_nyc.index.hour)

        event_dates = calendar.floor_date(event_dates)

        # realised is traditionally on later day eg. 3rd Jan realised ON is 2nd-3rd Jan realised
        # so if Fed meeting is on 2nd Jan later, then we need realised labelled on 3rd (so minus a day)
        # implied expires on next day eg. 3rd Jan implied ON is 3rd-4th Jan implied

        # TODO smarter way of adjusting dates, as sometimes events can be before/after 10am NY cut
        if (lagged and average_hour_nyc >= NYC_cutoff):
            data_frame.index = data_frame.index - bday
        elif (not lagged and average_hour_nyc < NYC_cutoff): # ie. implied
            data_frame.index = data_frame.index + bday

        # set as New York time and select only those ON vols at the 10am NY cut just before the event
        data_frame_events = data_frame.ix[event_dates.index]
        data_frame_events.columns = data_frame.columns.values + '-' + name + ' ' + event

        return data_frame_events
Пример #7
0
    def harvest_time_series(self, time_series_request, kill_session = True):
        """
        havest_time_series - Loads time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        DataFrame
        """

        tickers = time_series_request.tickers
        loader = self.get_loader(time_series_request.data_source)

        # check if tickers have been specified (if not load all of them for a category)
        # also handle single tickers/list tickers
        create_tickers = False

        if tickers is None : create_tickers = True
        elif isinstance(tickers, str):
            if tickers == '': create_tickers = True
        elif isinstance(tickers, list):
            if tickers == []: create_tickers = True

        if create_tickers:
            time_series_request.tickers = self.config.get_tickers_list_for_category(
            time_series_request.category, time_series_request.source, time_series_request.freq, time_series_request.cut)

        # intraday or tick: only one ticker per cache file
        if (time_series_request.freq in ['intraday', 'tick']):
            data_frame_agg = self.download_intraday_tick(time_series_request, loader)

        # daily: multiple tickers per cache file - assume we make one API call to vendor library
        else: data_frame_agg = self.download_daily(time_series_request, loader)

        if('internet_load' in time_series_request.cache_algo):
            self.logger.debug("Internet loading.. ")

            # signal to loader template to exit session
            if loader is not None and kill_session == True: loader.kill_session()

        if(time_series_request.cache_algo == 'cache_algo'):
            self.logger.debug("Only caching data in memory, do not return any time series."); return

        tsf = TimeSeriesFilter()

        # only return time series if specified in the algo
        if 'return' in time_series_request.cache_algo:
            # special case for events/events-dt which is not indexed like other tables
            if hasattr(time_series_request, 'category'):
                if 'events' in time_series_request.category:
                    return data_frame_agg

            try:
                return tsf.filter_time_series(time_series_request, data_frame_agg)
            except:
                return None
Пример #8
0
    def bus_day_of_month_seasonality(self, data_frame,
                                 month_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], cum = True,
                                 cal = "FX", partition_by_month = True, add_average = False, price_index = False):

        tsc = TimeSeriesCalcs()
        tsf = TimeSeriesFilter()

        if price_index:
            data_frame = data_frame.resample('B')           # resample into business days
            data_frame = tsc.calculate_returns(data_frame)

        data_frame.index = pandas.to_datetime(data_frame.index)
        data_frame = tsf.filter_time_series_by_holidays(data_frame, cal)

        monthly_seasonality = tsc.average_by_month_day_by_bus_day(data_frame, cal)
        monthly_seasonality = monthly_seasonality.loc[month_list]

        if partition_by_month:
            monthly_seasonality = monthly_seasonality.unstack(level=0)

            if add_average:
               monthly_seasonality['Avg'] = monthly_seasonality.mean(axis=1)

        if cum is True:
            if partition_by_month:
                monthly_seasonality.loc[0] = numpy.zeros(len(monthly_seasonality.columns))
                # monthly_seasonality.index = monthly_seasonality.index + 1       # shifting index
                monthly_seasonality = monthly_seasonality.sort()

            monthly_seasonality = tsc.create_mult_index(monthly_seasonality)

        return monthly_seasonality
Пример #9
0
    def compare_strategy_vs_benchmark(self, br, strategy_df, benchmark_df):
        """
        compare_strategy_vs_benchmark - Compares the trading strategy we are backtesting against a benchmark

        Parameters
        ----------
        br : BacktestRequest
            Parameters for backtest such as start and finish dates

        strategy_df : pandas.DataFrame
            Strategy time series

        benchmark_df : pandas.DataFrame
            Benchmark time series
        """

        include_benchmark = False
        calc_stats = False

        if hasattr(br, 'include_benchmark'): include_benchmark = br.include_benchmark
        if hasattr(br, 'calc_stats'): calc_stats = br.calc_stats

        if include_benchmark:
            tsd = TimeSeriesDesc()
            cash_backtest = CashBacktest()
            ts_filter = TimeSeriesFilter()
            ts_calcs = TimeSeriesCalcs()

            # align strategy time series with that of benchmark
            strategy_df, benchmark_df = strategy_df.align(benchmark_df, join='left', axis = 0)

            # if necessary apply vol target to benchmark (to make it comparable with strategy)
            if hasattr(br, 'portfolio_vol_adjust'):
                if br.portfolio_vol_adjust is True:
                    benchmark_df = cash_backtest.calculate_vol_adjusted_index_from_prices(benchmark_df, br = br)

            # only calculate return statistics if this has been specified (note when different frequencies of data
            # might underrepresent vol
            if calc_stats:
                benchmark_df = benchmark_df.fillna(method='ffill')
                tsd.calculate_ret_stats_from_prices(benchmark_df, br.ann_factor)
                benchmark_df.columns = tsd.summary()

            # realign strategy & benchmark
            strategy_benchmark_df = strategy_df.join(benchmark_df, how='inner')
            strategy_benchmark_df = strategy_benchmark_df.fillna(method='ffill')

            strategy_benchmark_df = ts_filter.filter_time_series_by_date(br.plot_start, br.finish_date, strategy_benchmark_df)
            strategy_benchmark_df = ts_calcs.create_mult_index_from_prices(strategy_benchmark_df)

            self._benchmark_pnl = benchmark_df
            self._benchmark_tsd = tsd

            return strategy_benchmark_df

        return strategy_df
    def get_intraday_moves_over_event(self, data_frame_rets, cross, event_fx, event_name, start, end, vol, mins = 3 * 60,
                                      min_offset = 0, create_index = False, resample = False, freq = 'minutes'):
        tsf = TimeSeriesFilter()

        ef_time_frame = self.get_economic_event_date_time_dataframe(event_fx, event_name)
        ef_time_frame = tsf.filter_time_series_by_date(start, end, ef_time_frame)

        return self.get_intraday_moves_over_custom_event(data_frame_rets, ef_time_frame,
                                                         vol, mins = mins, min_offset = min_offset,
                                                         create_index = create_index, resample = resample, freq = freq)#, start, end)
Пример #11
0
    def __init__(self):
        super(EventStudy, self).__init__()

        self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        self.time_series_filter = TimeSeriesFilter()
        self.time_series_io = TimeSeriesIO()

        if (LightEventsFactory._econ_data_frame is None):
            self.load_economic_events()
        return
    def get_surprise_against_intraday_moves_over_event(self, data_frame_cross_orig, cross, event_fx, event_name, start, end,
                                                       offset_list = [1, 5, 30, 60], add_surprise = False,
                                                       surprise_field = 'survey-average'):

        tsf = TimeSeriesFilter()
        fields = ['actual-release', 'survey-median', 'survey-average', 'survey-high', 'survey-low']

        ef_time_frame = self.get_economic_event_date_time_fields(fields, event_fx, event_name)
        ef_time_frame = tsf.filter_time_series_by_date(start, end, ef_time_frame)

        return self.get_surprise_against_intraday_moves_over_custom_event(data_frame_cross_orig, ef_time_frame, cross, event_fx, event_name, start, end,
                                                       offset_list = offset_list, add_surprise = add_surprise,
                                                       surprise_field = surprise_field)
Пример #13
0
    def __init__(self):
        # self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        self.time_series_filter = TimeSeriesFilter()
        self.time_series_io = TimeSeriesIO()
        self._bbg_default_api = Constants().bbg_default_api
        self._intraday_code = -1

        return
    def dump_indicators(self):
        tsf = TimeSeriesFilter()
        self.logger.info("About to write all indicators to CSV")
        self.indicator.to_csv(self._csv_indicator_dump, date_format='%d/%m/%Y')

        if (self._csv_econ_indicator_dump is not None):
            self.logger.info("About to write economy based indicators to CSV")
            self.indicator_econ.to_csv(self._csv_econ_indicator_dump, date_format='%d/%m/%Y')

        self.logger.info("About to write final indicators to CSV")

        # remove weekends and remove start of series
        if (self._csv_final_indicator_dump is not None):
            indicator_final_copy = tsf.filter_time_series_by_holidays(self.indicator_final, cal = 'WEEKDAY')
            indicator_final_copy = tsf.filter_time_series_by_date(
                start_date="01 Jan 2000", finish_date = None, data_frame=indicator_final_copy)

            indicator_final_copy.to_csv(self._csv_final_indicator_dump, date_format='%d/%m/%Y')
Пример #15
0
    def get_economic_event_ret_over_custom_event_day(self,
                                                     data_frame_in,
                                                     event_dates,
                                                     name,
                                                     event,
                                                     start,
                                                     end,
                                                     lagged=False,
                                                     NYC_cutoff=10):

        time_series_filter = TimeSeriesFilter()
        event_dates = time_series_filter.filter_time_series_by_date(
            start, end, event_dates)

        data_frame = data_frame_in.copy(
            deep=True)  # because we change the dates!

        time_series_tz = TimeSeriesTimezone()
        calendar = Calendar()

        bday = CustomBusinessDay(weekmask='Mon Tue Wed Thu Fri')

        event_dates_nyc = time_series_tz.convert_index_from_UTC_to_new_york_time(
            event_dates)
        average_hour_nyc = numpy.average(event_dates_nyc.index.hour)

        event_dates = calendar.floor_date(event_dates)

        # realised is traditionally on later day eg. 3rd Jan realised ON is 2nd-3rd Jan realised
        # so if Fed meeting is on 2nd Jan later, then we need realised labelled on 3rd (so minus a day)
        # implied expires on next day eg. 3rd Jan implied ON is 3rd-4th Jan implied

        # TODO smarter way of adjusting dates, as sometimes events can be before/after 10am NY cut
        if (lagged and average_hour_nyc >= NYC_cutoff):
            data_frame.index = data_frame.index - bday
        elif (not lagged and average_hour_nyc < NYC_cutoff):  # ie. implied
            data_frame.index = data_frame.index + bday

        # set as New York time and select only those ON vols at the 10am NY cut just before the event
        data_frame_events = data_frame.ix[event_dates.index]
        data_frame_events.columns = data_frame.columns.values + '-' + name + ' ' + event

        return data_frame_events
    def __init__(self):
        super(EventStudy, self).__init__()

        self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        self.time_series_filter = TimeSeriesFilter()
        self.time_series_io = TimeSeriesIO()

        if (LightEventsFactory._econ_data_frame is None):
            self.load_economic_events()
        return
Пример #17
0
    def bus_day_of_month_seasonality(
            self,
            data_frame,
            month_list=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
            cum=True,
            cal="FX",
            partition_by_month=True,
            add_average=False,
            price_index=False):

        tsc = TimeSeriesCalcs()
        tsf = TimeSeriesFilter()

        if price_index:
            data_frame = data_frame.resample(
                'B')  # resample into business days
            data_frame = tsc.calculate_returns(data_frame)

        data_frame.index = pandas.to_datetime(data_frame.index)
        data_frame = tsf.filter_time_series_by_holidays(data_frame, cal)

        monthly_seasonality = tsc.average_by_month_day_by_bus_day(
            data_frame, cal)
        monthly_seasonality = monthly_seasonality.loc[month_list]

        if partition_by_month:
            monthly_seasonality = monthly_seasonality.unstack(level=0)

            if add_average:
                monthly_seasonality['Avg'] = monthly_seasonality.mean(axis=1)

        if cum is True:
            if partition_by_month:
                monthly_seasonality.loc[0] = numpy.zeros(
                    len(monthly_seasonality.columns))
                # monthly_seasonality.index = monthly_seasonality.index + 1       # shifting index
                monthly_seasonality = monthly_seasonality.sort()

            monthly_seasonality = tsc.create_mult_index(monthly_seasonality)

        return monthly_seasonality
Пример #18
0
    def dump_indicators(self):
        tsf = TimeSeriesFilter()
        self.logger.info("About to write all indicators to CSV")
        self.indicator.to_csv(self._csv_indicator_dump, date_format='%d/%m/%Y')

        if (self._csv_econ_indicator_dump is not None):
            self.logger.info("About to write economy based indicators to CSV")
            self.indicator_econ.to_csv(self._csv_econ_indicator_dump,
                                       date_format='%d/%m/%Y')

        self.logger.info("About to write final indicators to CSV")

        # remove weekends and remove start of series
        if (self._csv_final_indicator_dump is not None):
            indicator_final_copy = tsf.filter_time_series_by_holidays(
                self.indicator_final, cal='WEEKDAY')
            indicator_final_copy = tsf.filter_time_series_by_date(
                start_date="01 Jan 2000",
                finish_date=None,
                data_frame=indicator_final_copy)

            indicator_final_copy.to_csv(self._csv_final_indicator_dump,
                                        date_format='%d/%m/%Y')
Пример #19
0
    def get_intraday_moves_over_custom_event(self, data_frame_rets, ef_time_frame, vol=False,
                                             minute_start = 5, mins = 3 * 60, min_offset = 0 , create_index = False,
                                             resample = False, freq = 'minutes'):

        tsf = TimeSeriesFilter()
        ef_time_frame = tsf.filter_time_series_by_date(data_frame_rets.index[0], data_frame_rets.index[-1], ef_time_frame)
        ef_time = ef_time_frame.index

        if freq == 'minutes':
            ef_time_start = ef_time - timedelta(minutes = minute_start)
            ef_time_end = ef_time + timedelta(minutes = mins)
            ann_factor = 252 * 1440
        elif freq == 'days':
            ef_time = ef_time_frame.index.normalize()
            ef_time_start = ef_time - timedelta(days = minute_start)
            ef_time_end = ef_time + timedelta(days = mins)
            ann_factor = 252

        ords = range(-minute_start + min_offset, mins + min_offset)

        # all data needs to be equally spaced
        if resample:
            tsf = TimeSeriesFilter()

            # make sure time series is properly sampled at 1 min intervals
            data_frame_rets = data_frame_rets.resample('1min')
            data_frame_rets = data_frame_rets.fillna(value = 0)
            data_frame_rets = tsf.remove_out_FX_out_of_hours(data_frame_rets)

        data_frame_rets['Ind'] = numpy.nan

        start_index = data_frame_rets.index.searchsorted(ef_time_start)
        finish_index = data_frame_rets.index.searchsorted(ef_time_end)

        # not all observation windows will be same length (eg. last one?)

        # fill the indices which represent minutes
        # TODO vectorise this!
        for i in range(0, len(ef_time_frame.index)):
            try:
                data_frame_rets.ix[start_index[i]:finish_index[i], 'Ind'] = ords
            except:
                data_frame_rets.ix[start_index[i]:finish_index[i], 'Ind'] = ords[0:(finish_index[i] - start_index[i])]

        # set the release dates
        data_frame_rets.ix[start_index,'Rel'] = ef_time                                         # set entry points
        data_frame_rets.ix[finish_index + 1,'Rel'] = numpy.zeros(len(start_index))              # set exit points
        data_frame_rets['Rel'] = data_frame_rets['Rel'].fillna(method = 'pad')                  # fill down signals

        data_frame_rets = data_frame_rets[pandas.notnull(data_frame_rets['Ind'])]               # get rid of other

        data_frame = data_frame_rets.pivot(index='Ind',
                                           columns='Rel', values=data_frame_rets.columns[0])

        data_frame.index.names = [None]

        if create_index:
            tsc = TimeSeriesCalcs()
            data_frame.ix[-minute_start + min_offset,:] = numpy.nan
            data_frame = tsc.create_mult_index(data_frame)
        else:
            if vol is True:
                # annualise (if vol)
                data_frame = pandas.rolling_std(data_frame, window=5) * math.sqrt(ann_factor)
            else:
                data_frame = data_frame.cumsum()

        return data_frame
Пример #20
0
    def calculate_leverage_factor(self, returns_df, vol_target, vol_max_leverage, vol_periods = 60, vol_obs_in_year = 252,
                                  vol_rebalance_freq = 'BM', data_resample_freq = None, data_resample_type = 'mean',
                                  returns = True, period_shift = 0):
        """
        calculate_leverage_factor - Calculates the time series of leverage for a specified vol target

        Parameters
        ----------
        returns_df : DataFrame
            Asset returns

        vol_target : float
            vol target for assets

        vol_max_leverage : float
            maximum leverage allowed

        vol_periods : int
            number of periods to calculate volatility

        vol_obs_in_year : int
            number of observations in the year

        vol_rebalance_freq : str
            how often to rebalance

        vol_resample_type : str
            do we need to resample the underlying data first? (eg. have we got intraday data?)

        returns : boolean
            is this returns time series or prices?

        period_shift : int
            should we delay the signal by a number of periods?

        Returns
        -------
        pandas.Dataframe
        """

        tsc = TimeSeriesCalcs()
        tsf = TimeSeriesFilter()

        if data_resample_freq is not None:
            return
            # TODO not implemented yet

        if not returns: returns_df = tsc.calculate_returns(returns_df)

        roll_vol_df = tsc.rolling_volatility(returns_df,
                                        periods = vol_periods, obs_in_year = vol_obs_in_year).shift(period_shift)

        # calculate the leverage as function of vol target (with max lev constraint)
        lev_df = vol_target / roll_vol_df
        lev_df[lev_df > vol_max_leverage] = vol_max_leverage

        lev_df = tsf.resample_time_series_frequency(lev_df, vol_rebalance_freq, data_resample_type)

        returns_df, lev_df = returns_df.align(lev_df, join='left', axis = 0)

        lev_df = lev_df.fillna(method='ffill')
        lev_df.ix[0:vol_periods] = numpy.nan    # ignore the first elements before the vol window kicks in

        return lev_df
Пример #21
0
            groupby([date_index.month, date_index.day]).mean()

    def group_by_year(self, data_frame):
        date_index = data_frame.index

        return data_frame.\
            groupby([date_index.year])

    def average_by_day_hour_min_by_bus_day(self, data_frame):
        date_index = data_frame.index

        return data_frame.\
            groupby([Calendar().get_bus_day_of_month(date_index),
                     date_index.hour, date_index.minute]).mean()

    def remove_NaN_rows(self, data_frame):
        return data_frame.dropna()

if __name__ == '__main__':

    tsc = TimeSeriesCalcs()
    tsf = TimeSeriesFilter()

    # test rolling ewma
    date_range = pandas.bdate_range('2014-01-01', '2014-02-28')

    print(tsc.get_bus_day_of_month(date_range))

    foo = pandas.DataFrame(numpy.arange(0.0,13.0))
    print(tsc.rolling_ewma(foo, span=3))
Пример #22
0
class LightEventsFactory(EventStudy):

    _econ_data_frame = None

    # where your HDF5 file is stored with economic data MUST CHANGE!!
    _hdf5_file_econ_file = "somefilnename.h5"

    ### manual offset for certain events where Bloomberg displays the wrong date (usually because of time differences)
    _offset_events = {
        'AUD-Australia Labor Force Employment Change SA.release-dt': 1
    }

    def __init__(self):
        super(EventStudy, self).__init__()

        self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        self.time_series_filter = TimeSeriesFilter()
        self.time_series_io = TimeSeriesIO()

        if (LightEventsFactory._econ_data_frame is None):
            self.load_economic_events()
        return

    def load_economic_events(self):
        LightEventsFactory._econ_data_frame = self.time_series_io.read_time_series_cache_from_disk(
            self._hdf5_file_econ_file)

    def harvest_category(self, category_name):
        cat = self.config.get_categories_from_tickers_selective_filter(
            category_name)

        for k in cat:
            time_series_request = self.time_series_factory.populate_time_series_request(
                k)
            data_frame = self.time_series_factory.harvest_time_series(
                time_series_request)

            # TODO allow merge of multiple sources

        return data_frame

    def get_economic_events(self):
        return LightEventsFactory._econ_data_frame

    def dump_economic_events_csv(self, path):
        LightEventsFactory._econ_data_frame.to_csv(path)

    def get_economic_event_date_time(self, name, event=None, csv=None):
        ticker = self.create_event_desciptor_field(name, event,
                                                   "release-date-time-full")

        if csv is None:
            data_frame = LightEventsFactory._econ_data_frame[ticker]
            data_frame.index = LightEventsFactory._econ_data_frame[ticker]
        else:
            dateparse = lambda x: datetime.datetime.strptime(
                x, '%d/%m/%Y %H:%M')

            data_frame = pandas.read_csv(csv,
                                         index_col=0,
                                         parse_dates=True,
                                         date_parser=dateparse)

        data_frame = data_frame[pandas.notnull(data_frame.index)]

        start_date = datetime.datetime.strptime("01-Jan-1971", "%d-%b-%Y")
        self.time_series_filter.filter_time_series_by_date(
            start_date, None, data_frame)

        return data_frame

    def get_economic_event_date_time_dataframe(self,
                                               name,
                                               event=None,
                                               csv=None):
        series = self.get_economic_event_date_time(name, event, csv)

        data_frame = pandas.DataFrame(series.values, index=series.index)
        data_frame.columns.name = self.create_event_desciptor_field(
            name, event, "release-date-time-full")

        return data_frame

    def get_economic_event_date_time_fields(self, fields, name, event=None):
        ### acceptible fields
        # actual-release
        # survey-median
        # survey-average
        # survey-high
        # survey-low
        # survey-high
        # number-observations
        # release-dt
        # release-date-time-full
        # first-revision
        # first-revision-date

        ticker = []

        # construct tickers of the form USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.actual-release
        for i in range(0, len(fields)):
            ticker.append(
                self.create_event_desciptor_field(name, event, fields[i]))

        # index on the release-dt field eg. 20101230 (we shall convert this later)
        ticker_index = self.create_event_desciptor_field(
            name, event, "release-dt")

        ######## grab event date/times
        event_date_time = self.get_economic_event_date_time(name, event)
        date_time_fore = event_date_time.index

        # create dates for join later
        date_time_dt = [
            datetime.datetime(date_time_fore[x].year, date_time_fore[x].month,
                              date_time_fore[x].day)
            for x in range(len(date_time_fore))
        ]

        event_date_time_frame = pandas.DataFrame(event_date_time.index,
                                                 date_time_dt)
        event_date_time_frame.index = date_time_dt

        ######## grab event date/fields
        data_frame = LightEventsFactory._econ_data_frame[ticker]
        data_frame.index = LightEventsFactory._econ_data_frame[ticker_index]

        data_frame = data_frame[data_frame.index !=
                                0]  # eliminate any 0 dates (artifact of Excel)
        data_frame = data_frame[pandas.notnull(
            data_frame.index)]  # eliminate any NaN dates (artifact of Excel)
        ind_dt = data_frame.index

        # convert yyyymmdd format to datetime
        data_frame.index = [
            datetime.datetime(
                int((ind_dt[x] - (ind_dt[x] % 10000)) / 10000),
                int(((ind_dt[x] % 10000) - (ind_dt[x] % 100)) / 100),
                int(ind_dt[x] % 100)) for x in range(len(ind_dt))
        ]

        # HACK! certain events need an offset because BBG have invalid dates
        if ticker_index in self._offset_events:
            data_frame.index = data_frame.index + timedelta(
                days=self._offset_events[ticker_index])

        ######## join together event dates/date-time/fields in one data frame
        data_frame = event_date_time_frame.join(data_frame, how='inner')
        data_frame.index = pandas.to_datetime(data_frame.index)
        data_frame.index.name = ticker_index

        return data_frame

    def create_event_desciptor_field(self, name, event, field):
        if event is None:
            return name + "." + field
        else:
            return name + "-" + event + "." + field

    def get_all_economic_events_date_time(self):
        event_names = self.get_all_economic_events()
        columns = ['event-name', 'release-date-time-full']

        data_frame = pandas.DataFrame(data=numpy.zeros((0, len(columns))),
                                      columns=columns)

        for event in event_names:
            event_times = self.get_economic_event_date_time(event)

            for time in event_times:
                data_frame.append(
                    {
                        'event-name': event,
                        'release-date-time-full': time
                    },
                    ignore_index=True)

        return data_frame

    def get_all_economic_events(self):
        field_names = LightEventsFactory._econ_data_frame.columns.values

        event_names = [x.split('.')[0] for x in field_names if '.Date' in x]

        event_names_filtered = [x for x in event_names if len(x) > 4]

        # sort list alphabetically (and remove any duplicates)
        return list(set(event_names_filtered))

    def get_economic_event_date(self, name, event=None):
        return LightEventsFactory._econ_data_frame[
            self.create_event_desciptor_field(name, event, ".release-dt")]

    def get_economic_event_ret_over_custom_event_day(self,
                                                     data_frame_in,
                                                     name,
                                                     event,
                                                     start,
                                                     end,
                                                     lagged=False,
                                                     NYC_cutoff=10):

        # get the times of events
        event_dates = self.get_economic_event_date_time(name, event)

        return super(LightEventsFactory,
                     self).get_economic_event_ret_over_custom_event_day(
                         data_frame_in,
                         event_dates,
                         name,
                         event,
                         start,
                         end,
                         lagged=lagged,
                         NYC_cutoff=NYC_cutoff)

    def get_economic_event_vol_over_event_day(self,
                                              vol_in,
                                              name,
                                              event,
                                              start,
                                              end,
                                              realised=False):

        return self.get_economic_event_ret_over_custom_event_day(
            vol_in, name, event, start, end, lagged=realised)

        # return super(EventsFactory, self).get_economic_event_ret_over_event_day(vol_in, name, event, start, end, lagged = realised)

    def get_daily_moves_over_event(self):
        # TODO
        pass

    # return only US events etc. by dates
    def get_intraday_moves_over_event(self,
                                      data_frame_rets,
                                      cross,
                                      event_fx,
                                      event_name,
                                      start,
                                      end,
                                      vol,
                                      mins=3 * 60,
                                      min_offset=0,
                                      create_index=False,
                                      resample=False,
                                      freq='minutes'):
        tsf = TimeSeriesFilter()

        ef_time_frame = self.get_economic_event_date_time_dataframe(
            event_fx, event_name)
        ef_time_frame = tsf.filter_time_series_by_date(start, end,
                                                       ef_time_frame)

        return self.get_intraday_moves_over_custom_event(
            data_frame_rets,
            ef_time_frame,
            vol,
            mins=mins,
            min_offset=min_offset,
            create_index=create_index,
            resample=resample,
            freq=freq)  #, start, end)

    def get_surprise_against_intraday_moves_over_event(
            self,
            data_frame_cross_orig,
            cross,
            event_fx,
            event_name,
            start,
            end,
            offset_list=[1, 5, 30, 60],
            add_surprise=False,
            surprise_field='survey-average'):

        tsf = TimeSeriesFilter()
        fields = [
            'actual-release', 'survey-median', 'survey-average', 'survey-high',
            'survey-low'
        ]

        ef_time_frame = self.get_economic_event_date_time_fields(
            fields, event_fx, event_name)
        ef_time_frame = tsf.filter_time_series_by_date(start, end,
                                                       ef_time_frame)

        return self.get_surprise_against_intraday_moves_over_custom_event(
            data_frame_cross_orig,
            ef_time_frame,
            cross,
            event_fx,
            event_name,
            start,
            end,
            offset_list=offset_list,
            add_surprise=add_surprise,
            surprise_field=surprise_field)
Пример #23
0
    def get_intraday_moves_over_custom_event(self,
                                             data_frame_rets,
                                             ef_time_frame,
                                             vol=False,
                                             minute_start=5,
                                             mins=3 * 60,
                                             min_offset=0,
                                             create_index=False,
                                             resample=False,
                                             freq='minutes'):

        tsf = TimeSeriesFilter()
        ef_time_frame = tsf.filter_time_series_by_date(
            data_frame_rets.index[0], data_frame_rets.index[-1], ef_time_frame)
        ef_time = ef_time_frame.index

        if freq == 'minutes':
            ef_time_start = ef_time - timedelta(minutes=minute_start)
            ef_time_end = ef_time + timedelta(minutes=mins)
            ann_factor = 252 * 1440
        elif freq == 'days':
            ef_time = ef_time_frame.index.normalize()
            ef_time_start = ef_time - timedelta(days=minute_start)
            ef_time_end = ef_time + timedelta(days=mins)
            ann_factor = 252

        ords = range(-minute_start + min_offset, mins + min_offset)

        # all data needs to be equally spaced
        if resample:
            tsf = TimeSeriesFilter()

            # make sure time series is properly sampled at 1 min intervals
            data_frame_rets = data_frame_rets.resample('1min')
            data_frame_rets = data_frame_rets.fillna(value=0)
            data_frame_rets = tsf.remove_out_FX_out_of_hours(data_frame_rets)

        data_frame_rets['Ind'] = numpy.nan

        start_index = data_frame_rets.index.searchsorted(ef_time_start)
        finish_index = data_frame_rets.index.searchsorted(ef_time_end)

        # not all observation windows will be same length (eg. last one?)

        # fill the indices which represent minutes
        # TODO vectorise this!
        for i in range(0, len(ef_time_frame.index)):
            try:
                data_frame_rets.ix[start_index[i]:finish_index[i],
                                   'Ind'] = ords
            except:
                data_frame_rets.ix[start_index[i]:finish_index[i],
                                   'Ind'] = ords[0:(finish_index[i] -
                                                    start_index[i])]

        # set the release dates
        data_frame_rets.ix[start_index, 'Rel'] = ef_time  # set entry points
        data_frame_rets.ix[finish_index + 1, 'Rel'] = numpy.zeros(
            len(start_index))  # set exit points
        data_frame_rets['Rel'] = data_frame_rets['Rel'].fillna(
            method='pad')  # fill down signals

        data_frame_rets = data_frame_rets[pandas.notnull(
            data_frame_rets['Ind'])]  # get rid of other

        data_frame = data_frame_rets.pivot(index='Ind',
                                           columns='Rel',
                                           values=data_frame_rets.columns[0])

        data_frame.index.names = [None]

        if create_index:
            tsc = TimeSeriesCalcs()
            data_frame.ix[-minute_start + min_offset, :] = numpy.nan
            data_frame = tsc.create_mult_index(data_frame)
        else:
            if vol is True:
                # annualise (if vol)
                data_frame = pandas.rolling_std(
                    data_frame, window=5) * math.sqrt(ann_factor)
            else:
                data_frame = data_frame.cumsum()

        return data_frame
class LightEventsFactory(EventStudy):

    _econ_data_frame = None

    # where your HDF5 file is stored with economic data MUST CHANGE!!
    _hdf5_file_econ_file = "somefilnename.h5"

    ### manual offset for certain events where Bloomberg displays the wrong date (usually because of time differences)
    _offset_events = {'AUD-Australia Labor Force Employment Change SA.release-dt' : 1}

    def __init__(self):
        super(EventStudy, self).__init__()

        self.config = ConfigManager()
        self.logger = LoggerManager().getLogger(__name__)
        self.time_series_filter = TimeSeriesFilter()
        self.time_series_io = TimeSeriesIO()

        if (LightEventsFactory._econ_data_frame is None):
            self.load_economic_events()
        return

    def load_economic_events(self):
        LightEventsFactory._econ_data_frame = self.time_series_io.read_time_series_cache_from_disk(self._hdf5_file_econ_file)

    def harvest_category(self, category_name):
        cat = self.config.get_categories_from_tickers_selective_filter(category_name)

        for k in cat:
            time_series_request = self.time_series_factory.populate_time_series_request(k)
            data_frame = self.time_series_factory.harvest_time_series(time_series_request)

            # TODO allow merge of multiple sources

        return data_frame

    def get_economic_events(self):
        return LightEventsFactory._econ_data_frame

    def dump_economic_events_csv(self, path):
        LightEventsFactory._econ_data_frame.to_csv(path)

    def get_economic_event_date_time(self, name, event = None, csv = None):
        ticker = self.create_event_desciptor_field(name, event, "release-date-time-full")

        if csv is None:
            data_frame = LightEventsFactory._econ_data_frame[ticker]
            data_frame.index = LightEventsFactory._econ_data_frame[ticker]
        else:
            dateparse = lambda x: datetime.datetime.strptime(x, '%d/%m/%Y %H:%M')

            data_frame = pandas.read_csv(csv, index_col=0, parse_dates = True, date_parser=dateparse)

        data_frame = data_frame[pandas.notnull(data_frame.index)]

        start_date = datetime.datetime.strptime("01-Jan-1971", "%d-%b-%Y")
        self.time_series_filter.filter_time_series_by_date(start_date, None, data_frame)

        return data_frame

    def get_economic_event_date_time_dataframe(self, name, event = None, csv = None):
        series = self.get_economic_event_date_time(name, event, csv)

        data_frame = pandas.DataFrame(series.values, index=series.index)
        data_frame.columns.name = self.create_event_desciptor_field(name, event, "release-date-time-full")

        return data_frame

    def get_economic_event_date_time_fields(self, fields, name, event = None):
        ### acceptible fields
        # actual-release
        # survey-median
        # survey-average
        # survey-high
        # survey-low
        # survey-high
        # number-observations
        # release-dt
        # release-date-time-full
        # first-revision
        # first-revision-date

        ticker = []

        # construct tickers of the form USD-US Employees on Nonfarm Payrolls Total MoM Net Change SA.actual-release
        for i in range(0, len(fields)):
            ticker.append(self.create_event_desciptor_field(name, event, fields[i]))

        # index on the release-dt field eg. 20101230 (we shall convert this later)
        ticker_index = self.create_event_desciptor_field(name, event, "release-dt")

        ######## grab event date/times
        event_date_time = self.get_economic_event_date_time(name, event)
        date_time_fore = event_date_time.index

        # create dates for join later
        date_time_dt = [datetime.datetime(
                                date_time_fore[x].year,
                                date_time_fore[x].month,
                                date_time_fore[x].day)
                                for x in range(len(date_time_fore))]

        event_date_time_frame = pandas.DataFrame(event_date_time.index, date_time_dt)
        event_date_time_frame.index = date_time_dt

        ######## grab event date/fields
        data_frame = LightEventsFactory._econ_data_frame[ticker]
        data_frame.index = LightEventsFactory._econ_data_frame[ticker_index]

        data_frame = data_frame[data_frame.index != 0]              # eliminate any 0 dates (artifact of Excel)
        data_frame = data_frame[pandas.notnull(data_frame.index)]   # eliminate any NaN dates (artifact of Excel)
        ind_dt = data_frame.index

        # convert yyyymmdd format to datetime
        data_frame.index = [datetime.datetime(
                               int((ind_dt[x] - (ind_dt[x] % 10000))/10000),
                               int(((ind_dt[x] % 10000) - (ind_dt[x] % 100))/100),
                               int(ind_dt[x] % 100)) for x in range(len(ind_dt))]

        # HACK! certain events need an offset because BBG have invalid dates
        if ticker_index in self._offset_events:
             data_frame.index = data_frame.index + timedelta(days=self._offset_events[ticker_index])

        ######## join together event dates/date-time/fields in one data frame
        data_frame = event_date_time_frame.join(data_frame, how='inner')
        data_frame.index = pandas.to_datetime(data_frame.index)
        data_frame.index.name = ticker_index

        return data_frame

    def create_event_desciptor_field(self, name, event, field):
        if event is None:
            return name + "." + field
        else:
            return name + "-" + event + "." + field

    def get_all_economic_events_date_time(self):
        event_names = self.get_all_economic_events()
        columns = ['event-name', 'release-date-time-full']

        data_frame = pandas.DataFrame(data=numpy.zeros((0,len(columns))), columns=columns)

        for event in event_names:
            event_times = self.get_economic_event_date_time(event)

            for time in event_times:
                data_frame.append({'event-name':event, 'release-date-time-full':time}, ignore_index=True)

        return data_frame

    def get_all_economic_events(self):
        field_names = LightEventsFactory._econ_data_frame.columns.values

        event_names = [x.split('.')[0] for x in field_names if '.Date' in x]

        event_names_filtered = [x for x in event_names if len(x) > 4]

        # sort list alphabetically (and remove any duplicates)
        return list(set(event_names_filtered))

    def get_economic_event_date(self, name, event = None):
        return LightEventsFactory._econ_data_frame[
            self.create_event_desciptor_field(name, event, ".release-dt")]

    def get_economic_event_ret_over_custom_event_day(self, data_frame_in, name, event, start, end, lagged = False,
                                              NYC_cutoff = 10):


        # get the times of events
        event_dates = self.get_economic_event_date_time(name, event)

        return super(LightEventsFactory, self).get_economic_event_ret_over_custom_event_day(data_frame_in, event_dates, name, event, start, end,
                                                                                            lagged = lagged, NYC_cutoff = NYC_cutoff)

    def get_economic_event_vol_over_event_day(self, vol_in, name, event, start, end, realised = False):

        return self.get_economic_event_ret_over_custom_event_day(vol_in, name, event, start, end,
            lagged = realised)

        # return super(EventsFactory, self).get_economic_event_ret_over_event_day(vol_in, name, event, start, end, lagged = realised)

    def get_daily_moves_over_event(self):
        # TODO
        pass

    # return only US events etc. by dates
    def get_intraday_moves_over_event(self, data_frame_rets, cross, event_fx, event_name, start, end, vol, mins = 3 * 60,
                                      min_offset = 0, create_index = False, resample = False, freq = 'minutes'):
        tsf = TimeSeriesFilter()

        ef_time_frame = self.get_economic_event_date_time_dataframe(event_fx, event_name)
        ef_time_frame = tsf.filter_time_series_by_date(start, end, ef_time_frame)

        return self.get_intraday_moves_over_custom_event(data_frame_rets, ef_time_frame,
                                                         vol, mins = mins, min_offset = min_offset,
                                                         create_index = create_index, resample = resample, freq = freq)#, start, end)

    def get_surprise_against_intraday_moves_over_event(self, data_frame_cross_orig, cross, event_fx, event_name, start, end,
                                                       offset_list = [1, 5, 30, 60], add_surprise = False,
                                                       surprise_field = 'survey-average'):

        tsf = TimeSeriesFilter()
        fields = ['actual-release', 'survey-median', 'survey-average', 'survey-high', 'survey-low']

        ef_time_frame = self.get_economic_event_date_time_fields(fields, event_fx, event_name)
        ef_time_frame = tsf.filter_time_series_by_date(start, end, ef_time_frame)

        return self.get_surprise_against_intraday_moves_over_custom_event(data_frame_cross_orig, ef_time_frame, cross, event_fx, event_name, start, end,
                                                       offset_list = offset_list, add_surprise = add_surprise,
                                                       surprise_field = surprise_field)
Пример #25
0
    def get_bus_day_of_month(self, date, cal = 'FX'):
        """ get_bus_day_of_month(date = list of dates, cal = calendar name)

            returns the business day of the month (ie. 3rd Jan, on a Monday,
            would be the 1st business day of the month
        """
        tsf = TimeSeriesFilter()

        try:
            date = date.normalize() # strip times off the dates - for business dates just want dates!
        except: pass

        start = pandas.to_datetime(datetime.datetime(date.year[0], date.month[0], 1))
        end = datetime.datetime.today()#pandas.to_datetime(datetime.datetime(date.year[-1], date.month[-1], date.day[-1]))

        holidays = tsf.get_holidays(start, end, cal)

        bday = CustomBusinessDay(holidays=holidays, weekmask='Mon Tue Wed Thu Fri')

        bus_dates = pandas.date_range(start, end, freq=bday)

        month = bus_dates.month

        work_day_index = numpy.zeros(len(bus_dates))
        work_day_index[0] = 1

        for i in range(1, len(bus_dates)):
            if month[i] == month[i-1]:
                work_day_index[i] = work_day_index[i-1] + 1
            else:
                work_day_index[i] = 1

        bus_day_of_month = work_day_index[bus_dates.searchsorted(date)]

        # bus_day_of_month = numpy.zeros(len(date))
        # for i in range(0, len(date)):
        #     index = bus_dates.searchsorted(date[i])
        #     bus_day_of_month[i] = work_day_index[index]

        #
        # holidays = tsf.get_holidays(start, end, cal)
        #
        # bday = CustomBusinessDay(holidays=holidays, weekmask='Mon Tue Wed Thu Fri')
        # bmth_begin = CustomBusinessMonthBegin(holidays=holidays)

        # tsf = TimeSeriesFilter()
        #
        # # floored_dates = datetime.date(date.year, date.month, date.day[0])
        # start = pandas.to_datetime(datetime.datetime(date.year[0], date.month[0], 1))
        # end = pandas.to_datetime(datetime.datetime(date.year[-1], date.month[-1], date.day[-1]))
        #
        # holidays = tsf.get_holidays(start, end, cal)
        #
        # bday = CustomBusinessDay(holidays=holidays, weekmask='Mon Tue Wed Thu Fri')
        # bmth_begin = CustomBusinessMonthBegin(holidays=holidays)
        #
        # bus_day_dict = {}
        # first_day_of_month = []
        # first_day_of_month.append(start - bmth_begin)
        #
        # i = 0
        #
        # while(first_day_of_month[i-1] <= end):
        #     first_day_of_month.append(first_day_of_month[i-1] + bmth_begin)
        #     i = i + 1
        #
        # # create a dictionary of dates and business day of the month
        # # fill every day with NaN to begin with (to be overwritten)
        # # will fail if our data contains weekends
        # last_date = first_day_of_month[0]
        #
        # while(last_date <= end):
        #     bus_day_dict[last_date.year * 10000 + last_date.month * 100 + last_date.day] = numpy.NaN
        #     last_date = last_date + pandas.DateOffset(1)
        #
        # # for the business days
        # # create a dictionary of dates and business day of the month
        # for first in first_day_of_month:
        #     curr_month = first.month
        #     last_date = first
        #
        #     bus_day_offset = 1
        #
        #     while(curr_month == last_date.month):
        #         bus_day_dict[last_date.year * 10000 + last_date.month * 100 + last_date.day] = bus_day_offset
        #         last_date = last_date + bday
        #         bus_day_offset = bus_day_offset + 1
        #
        # date_key = date.year * 10000 + date.month * 100 + date.day
        #
        # date_key_unique = numpy.unicode(date_key)
        #
        # # if the day doesn't appear in the calendar assign -1
        # #bus_day_of_month = map(lambda x: bus_day_dict[x], date_key)
        # find_bus_ordinal = numpy.vectorize(lambda x: bus_day_dict[x])
        # bus_day_of_month = find_bus_ordinal(date_key_unique)
        # #[lambda x: bus_day_dict[x] for x in date]

        return bus_day_of_month
    def harvest_time_series(self, time_series_request, kill_session = True):
        """
        havest_time_series - Loads time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        tickers = time_series_request.tickers
        loader = self.get_loader(time_series_request.data_source)

        # check if tickers have been specified (if not load all of them for a category)
        # also handle single tickers/list tickers
        create_tickers = False

        if tickers is None : create_tickers = True
        elif isinstance(tickers, str):
            if tickers == '': create_tickers = True
        elif isinstance(tickers, list):
            if tickers == []: create_tickers = True

        if create_tickers:
            time_series_request.tickers = self.config.get_tickers_list_for_category(
            time_series_request.category, time_series_request.source, time_series_request.freq, time_series_request.cut)

        # intraday or tick: only one ticker per cache file
        if (time_series_request.freq in ['intraday', 'tick']):
            data_frame_agg = self.download_intraday_tick(time_series_request, loader)

        # daily: multiple tickers per cache file - assume we make one API call to vendor library
        else: data_frame_agg = self.download_daily(time_series_request, loader)

        if('internet_load' in time_series_request.cache_algo):
            self.logger.debug("Internet loading.. ")

            # signal to loader template to exit session
            if loader is not None and kill_session == True: loader.kill_session()

        if(time_series_request.cache_algo == 'cache_algo'):
            self.logger.debug("Only caching data in memory, do not return any time series."); return

        tsf = TimeSeriesFilter()

        # only return time series if specified in the algo
        if 'return' in time_series_request.cache_algo:
            # special case for events/events-dt which is not indexed like other tables
            if hasattr(time_series_request, 'category'):
                if 'events' in time_series_request.category:
                    return data_frame_agg

            try:
                return tsf.filter_time_series(time_series_request, data_frame_agg)
            except:
                import traceback

                self.logger.error(traceback.format_exc())

                return None
Пример #27
0
        # fetch NFP times from Bloomberg
        time_series_request = TimeSeriesRequest(
                start_date = start_date,                # start date
                finish_date = finish_date,              # finish date
                category = "events",
                freq = 'daily',                         # daily data
                data_source = 'bloomberg',              # use Bloomberg as data source
                tickers = ['NFP'],
                fields = ['release-date-time-full'],                    # which fields to download
                vendor_tickers = ['NFP TCH Index'], # ticker (Bloomberg)
                vendor_fields = ['ECO_FUTURE_RELEASE_DATE_LIST'],   # which Bloomberg fields to download
                cache_algo = 'internet_load_return')                # how to return data

        ltsf = LightTimeSeriesFactory()
        ts_filter = TimeSeriesFilter()

        df_event_times = ltsf.harvest_time_series(time_series_request)

        utc_time = pytz.utc
        df_event_times = pandas.DataFrame(index = df_event_times['NFP.release-date-time-full'])
        df_event_times.index = df_event_times.index.tz_localize(utc_time)    # work in UTC time
        df_event_times = ts_filter.filter_time_series_by_date(start_date, finish_date, df_event_times)

        # get last NFP time
        start_date = df_event_times.index[-1] - timedelta(minutes=1)
        finish_date = start_date + timedelta(minutes=4)

        tickers = ['EURUSD', 'JPYUSD', 'GBPUSD']
        vendor_tickers = ['EURUSD BGN Curncy', 'USDJPY BGN Curncy', 'GBPUSD BGN Curncy']
Пример #28
0
        # fetch NFP times from Bloomberg
        time_series_request = TimeSeriesRequest(
                start_date = start_date,                # start date
                finish_date = finish_date,              # finish date
                category = "events",
                freq = 'daily',                         # daily data
                data_source = 'bloomberg',              # use Bloomberg as data source
                tickers = ['NFP'],
                fields = ['release-date-time-full'],                    # which fields to download
                vendor_tickers = ['NFP TCH Index'], # ticker (Bloomberg)
                vendor_fields = ['ECO_FUTURE_RELEASE_DATE_LIST'],   # which Bloomberg fields to download
                cache_algo = 'internet_load_return')                # how to return data

        ltsf = LightTimeSeriesFactory()
        ts_filter = TimeSeriesFilter()

        df_event_times = ltsf.harvest_time_series(time_series_request)

        utc_time = pytz.utc
        df_event_times = pandas.DataFrame(index = df_event_times['NFP.release-date-time-full'])
        df_event_times.index = df_event_times.index.tz_localize(utc_time)    # work in UTC time
        df_event_times = ts_filter.filter_time_series_by_date(start_date, finish_date, df_event_times)

        # get last NFP time
        start_date = df_event_times.index[-1] - timedelta(minutes=1)
        finish_date = start_date + timedelta(minutes=4)

        tickers = ['EURUSD', 'JPYUSD', 'GBPUSD']
        vendor_tickers = ['EURUSD BGN Curncy', 'USDJPY BGN Curncy', 'GBPUSD BGN Curncy']