def get_futures_daily_close_data(tickers: list,
                                 start_date: datetime = None,
                                 end_date: datetime = None) -> pd.DataFrame:
    """
    Returns a DataFrame with the daily close of futures between start and end dates. N/A are rolled forward if they
    exists before the last observation date of the resp. futures contract.
    :param tickers: list of strings
    :param start_date: datetime
    :param end_date: datetime
    :return: DataFrame
    """
    # get the raw data from the financial database
    fin_db = FinancialDatabase(__MY_DATABASE_NAME__)
    raw_futures_data = fin_db.get_close_price_df(tickers, start_date, end_date)

    # clean the data by rolling N/A forward
    cleaned_futures_data = raw_futures_data.fillna(method='ffill')

    # get last observation date per ticker
    ticker_last_obs_date_dict = fin_db.get_ticker_underlying_attribute_dict(
        tickers, Underlying.latest_observation_date_with_values)

    # loop through each column and set each row to N/A if it is after the last observation date for the resp. ticker
    for col_i in range(cleaned_futures_data.shape[1]):
        last_obs_date = ticker_last_obs_date_dict[list(cleaned_futures_data)
                                                  [col_i]]
        try:
            last_obs_date_index = cleaned_futures_data.index.get_loc(
                last_obs_date)
        except KeyError:  # in case when the last observation period is after end_date
            last_obs_date_index = cleaned_futures_data.shape[0]
        cleaned_futures_data.iloc[last_obs_date_index + 1:, col_i] = np.nan
    return cleaned_futures_data
    def get_price(self):
        fin_db = FinancialDatabase(__MY_DATABASE_NAME__)
        price_info = ''
        if self.get_currency() is not None:
            price_info += '_' + self.get_currency().upper()
        if self.total_return_combo.get() == 'No':
            result_df = fin_db.get_close_price_df(
                tickers=self.ticker_list,
                start_date=self.get_start_date(),
                end_date=self.get_end_date(),
                currency=self.get_currency())
            result_df = self.handle_nan_df(result_df)
            self.parent.result_df_dict.update(
                {'close_price' + price_info: result_df})
        else:

            result_df = fin_db.get_total_return_df(
                tickers=self.ticker_list,
                start_date=self.get_start_date(),
                end_date=self.get_end_date(),
                currency=self.get_currency(),
                withholding_tax=self.get_div_tax())
            result_df = self.handle_nan_df(result_df)
            self.parent.result_df_dict.update(
                {'total_return_price' + price_info: result_df})
        logger.info('Done with loading price!')
        self.cancel()
Exemplo n.º 3
0
 def basket_prices(self,
                   start_date: {date, datetime} = None,
                   end_date: {date, datetime} = None,
                   forward_fill_na: bool = True):
     logger.debug('Get basket price.')
     financial_database_handler = FinancialDatabase(__MY_DATABASE_NAME__,
                                                    False)
     tickers = self.investment_universe.get_eligible_tickers()
     if self.total_return:
         price = financial_database_handler.get_total_return_df(
             tickers, start_date, end_date, self.dividend_tax,
             self.currency)
     else:
         price = financial_database_handler.get_close_price_df(
             tickers, start_date, end_date, self.currency)
     if forward_fill_na:
         price.fillna(inplace=True, method='ffill')
     return price
Exemplo n.º 4
0
class InvestmentUniverse:
    """Class definition for InvestmentUniverse"""
    def __init__(self,
                 tickers: {str, list, tuple},
                 start=None,
                 end=None,
                 periods=None,
                 freq=None,
                 observation_calendar: pd.DatetimeIndex = None):
        if com.count_not_none(start, end, periods, freq) != 0:
            self._observation_calendar = pd.date_range(start, end, periods,
                                                       freq)
        else:
            if observation_calendar is None:
                raise ValueError(
                    'Need to specify observation_calendar or the parameters of an observation calendar i.e'
                    'start, end, periods, freq')
            elif observation_calendar.is_monotonic_increasing:
                self._observation_calendar = observation_calendar
            else:
                raise ValueError(
                    'observation_calendar needs to be an instance of a DatatimeIndex object that is '
                    'monotonic increasing')

        self.tickers = tickers
        self._financial_database_handler = FinancialDatabase(
            __MY_DATABASE_NAME__)
        self._filter_has_been_applied = False
        self._filter_desc_list = []

    def get_start_end_dates(self):
        return min(self._observation_calendar), max(self._observation_calendar)

    def apply_custom_filter(self,
                            custom_eligibility_df: pd.DataFrame,
                            filter_desc: str = 'custom filter'):
        if list(custom_eligibility_df) != self.tickers:
            raise ValueError('Column headers (i.e. tickers) are not the same.'
                             '\nTickers in current investment universe: %s' %
                             ', '.join(self.tickers) +
                             '\nTickers in custom filter: %s' %
                             ', '.join(list(custom_eligibility_df)))
        elif not (custom_eligibility_df.index.is_monotonic_increasing and
                  isinstance(custom_eligibility_df.index, pd.DatetimeIndex)):
            raise ValueError(
                'Index needs to be a monotonically increasing DatetimeIndex.')
        self._apply_dataframe(custom_eligibility_df, filter_desc)

    def _apply_dataframe(self, df: pd.DataFrame, filter_desc: str):
        self._filter_desc_list.append(filter_desc)
        # merge (as of) the new filter to the current observation calendar
        new_filter = merge_two_dataframes_as_of(
            pd.DataFrame(index=self.observation_calendar), df)
        if self._filter_has_been_applied:
            self._eligibility_df = self._eligibility_df * new_filter.values
        else:
            self._eligibility_df = new_filter
        self._filter_has_been_applied = True

    def get_eligible_tickers(self) -> list:
        """
        Return a list with all tickers that has at least one 1 in their eligibility column i.e. the stocks that has
        passed the filters at least once.
        :return: list
        """
        stock_is_eligible_df = pd.DataFrame(
            data=self._eligibility_df.sum().gt(0),
            index=list(self._eligibility_df),
            columns=['eligibility'])
        return list(
            stock_is_eligible_df[stock_is_eligible_df['eligibility']].index)

    # ------------------------------------------------------------------------------------------------------------------
    # filter methods
    def apply_liquidity_filter(self,
                               avg_lag: int,
                               liquidity_threshold: float,
                               currency: str = None):
        if avg_lag < 1:
            raise ValueError(
                'avg_lag needs to be an int larger or equal to 1.')
        start_date, end_date = self.get_start_end_dates()
        liquidity_data = self._financial_database_handler.get_liquidity_df(
            self.tickers, start_date - BDay(avg_lag + 10), end_date, currency)
        avg_liquidity = rolling_average(liquidity_data, avg_lag)
        liquidity_eligibility = pd.DataFrame(data=np.where(
            avg_liquidity > liquidity_threshold, 1, 0),
                                             index=avg_liquidity.index,
                                             columns=avg_liquidity.columns)
        if currency is None:
            currency = ''
        self._apply_dataframe(
            liquidity_eligibility,
            '{} day avg. liquidity > {} {}'.format(avg_lag, currency.upper(),
                                                   liquidity_threshold))

    def apply_close_price_history_filter(
            self,
            minimum_number_consecutive_published_prices: int,
            tolerance: float = 0.95):
        closing_price_data = self._get_closing_price_data(
            lag=minimum_number_consecutive_published_prices)

        # is NaN only when there is less than minimum_number_consecutive_published_prices x tolerance available prices
        rolling_avg_df = closing_price_data.rolling(
            window=minimum_number_consecutive_published_prices,
            min_periods=int(
                tolerance *
                minimum_number_consecutive_published_prices)).mean()
        price_history_eligibility = pd.DataFrame(
            np.where(rolling_avg_df.isna(), 0, 1),
            index=rolling_avg_df.index,
            columns=rolling_avg_df.columns)
        self._apply_dataframe(
            price_history_eligibility,
            '{}% of prices has been published for the past {} days'.format(
                tolerance * 100, minimum_number_consecutive_published_prices))

    def apply_published_close_price_filter(
            self, max_number_days_since_publishing: int):
        closing_price_data = self._get_closing_price_data(
            lag=max_number_days_since_publishing)
        # first avg is calculated to check the availability at the start of the data (in case you observe at the start
        # of the available data)
        strict_rolling_avg_df = closing_price_data.rolling(
            window=max_number_days_since_publishing).mean()
        strict_rolling_avg_df.fillna(method='ffill', inplace=True)
        rolling_avg_df = closing_price_data.rolling(
            window=max_number_days_since_publishing, min_periods=1
        ).mean(
        )  # is NaN only when there is not a single value within the given period
        rolling_avg_df *= strict_rolling_avg_df.values
        price_availability_eligibility = pd.DataFrame(
            np.where(rolling_avg_df.isna(), 0, 1),
            index=rolling_avg_df.index,
            columns=rolling_avg_df.columns)
        self._apply_dataframe(
            price_availability_eligibility,
            'price published for the past {} days.'.format(
                max_number_days_since_publishing))

    # ------------------------------------------------------------------------------------------------------------------
    # get setter methods
    def get_eligibility_df(self, only_eligibile_tickers: bool = False):
        if self._filter_has_been_applied:
            if only_eligibile_tickers:
                eligible_tickers = self.get_eligible_tickers()
                if not len(eligible_tickers):
                    raise ValueError('No tickers passed the filter: %s' %
                                     ', '.join(self._filter_desc_list))
                return self._eligibility_df[eligible_tickers].replace(
                    0, np.nan)
            else:
                return self._eligibility_df.replace(0, np.nan)
        else:
            raise ValueError('No filter has been applied yet.')

    def _get_closing_price_data(self, lag: int) -> pd.DataFrame:
        if lag < 1:
            raise ValueError(
                'lag when loading prices needs to be an int larger or equal to 1.'
            )
        start_date, end_date = self.get_start_end_dates()
        closing_price_data = self._financial_database_handler.get_close_price_df(
            self.tickers, start_date - BDay(lag + 10), end_date)
        return closing_price_data

    def _get_liquidity_data(self, lag: int, currency: {str,
                                                       None}) -> pd.DataFrame:
        if lag < 1:
            raise ValueError(
                'lag when loading liquidity needs to be an int larger or equal to 1.'
            )
        start_date, end_date = self.get_start_end_dates()
        liquidity_data = self._financial_database_handler.get_liquidity_df(
            self.tickers, start_date - BDay(lag + 10), end_date, currency)
        return liquidity_data

    @property
    def observation_calendar(self):
        return self._observation_calendar

    @observation_calendar.setter
    def observation_calendar(self, observation_calendar: pd.DatetimeIndex):
        """
        Check if the observation calendar is monotonically increasing. Reset the eligibility DataFrame.
        :param observation_calendar:DatetimeIndex
        :return: None
        """
        if observation_calendar.is_monotonic_increasing and isinstance(
                observation_calendar, pd.DatetimeIndex):
            self._observation_calendar = observation_calendar
            self._eligibility_df = pd.DataFrame(columns=self._tickers,
                                                index=observation_calendar)
            self._filter_desc_list = []
        else:
            ValueError(
                'observation_calendar needs to be a DatetimeIndex that is monotonic increasing.'
            )

    @property
    def tickers(self):
        return self._tickers

    @tickers.setter
    def tickers(self, tickers: {str, list, tuple}):
        """
        Convert to list if ticker is str. Reset the eligibility DataFrame.
        :param tickers: str, list, tuple
        :return:
        """
        if isinstance(tickers, str):
            tickers = [tickers]
        elif type(tickers) not in [list, tuple]:
            raise ValueError('tickers needs to be a string, list and tuple.')
        self._tickers = [ticker.upper() for ticker in tickers]
        self._eligibility_df = pd.DataFrame(columns=self._tickers,
                                            index=self.observation_calendar)
        self._filter_desc_list = []

    def get_desc(self):
        if len(self._filter_desc_list):
            return '%s' % ', '.join(self._filter_desc_list)
        else:
            return 'no filter'

    def __repr__(self):
        return '<InvestmentUniverse(filter={})>'.format(self.get_desc())