Python cast_dataframe示例，qf_lib.containers.dataframe.cast_dataframe.cast_dataframe Python示例

示例#1

0

显示文件

文件： abstract_price_data_provider.py 项目： ajmal017/qf-lib

    def get_price(self, tickers: Union[Ticker, Sequence[Ticker]], fields: Union[PriceField, Sequence[PriceField]],
                  start_date: datetime, end_date: datetime = None, frequency: Frequency = Frequency.DAILY) -> \
            Union[None, PricesSeries, PricesDataFrame, QFDataArray]:

        got_single_date = False if frequency > Frequency.DAILY else (
            bool(start_date and (start_date == end_date))
        )

        if got_single_date:
            raise NotImplementedError("Single date queries are not supported yet")

        fields_str = self._map_field_to_str(tickers, fields)
        container = self.get_history(tickers, fields_str, start_date, end_date, frequency)

        # Convert to PriceSeries / PriceDataFrame and replace the string index with PriceField index
        if self._is_single_price_field(fields):
            if self._is_single_ticker(tickers):
                container = cast_series(container, PricesSeries)
            else:
                container = cast_dataframe(container, PricesDataFrame)
        else:
            str_to_field_dict = self.str_to_price_field_map(self._get_first_ticker(tickers))

            if self._is_single_ticker(tickers):
                # Many fields and single ticker - replace columns in PricesDataFrame
                container = cast_dataframe(container, PricesDataFrame)
                renaming_dict = {field_str: str_to_field_dict[field_str] for field_str in container.columns}
                container.rename(columns=renaming_dict, inplace=True)
            else:
                container = container.assign_coords(fields=[str_to_field_dict[field_str]
                                                            for field_str in container.fields.values])

        return container

示例#2

0

显示文件

文件： rolling_contracts_series_producer.py 项目： mborraty/qf-lib

    def _get_single_rolling_contract_info(
            self, real_contracts_prices_da: PricesDataFrame, rolling_dates: pd.DatetimeIndex, contract_number: int) \
            -> RollingContractData:
        now = self.timer.now()

        # lists of series (each series being a partial result, they are concatenated in the end)
        prices_df_list = []
        returns_tms_list = []
        time_to_expiration_tms_list = []

        last_rolling_date_idx = len(rolling_dates) - 1
        tickers = real_contracts_prices_da.tickers.values
        for i, start_date in enumerate(rolling_dates):
            if i < last_rolling_date_idx:
                end_date = rolling_dates[i + 1]
            else:
                end_date = now

            front_contract_idx = contract_number - 1 + i
            front_contract_da = real_contracts_prices_da[:,
                                                         front_contract_idx, :]

            front_contract_df = front_contract_da.to_pandas()
            front_contract_df = cast_dataframe(front_contract_df,
                                               PricesDataFrame)

            front_contract_df = front_contract_df.dropna()
            front_contract_df.name = tickers[front_contract_idx]

            partial_prices_df, partial_tte_tms = self._filter_dates(
                front_contract_df, start_date, end_date)
            close_prices_tms = partial_prices_df.loc[:, PriceField.Close]
            partial_returns_df = close_prices_tms.to_simple_returns()

            # remove price and first time to expiration for every contract except for the first one. Otherwise there
            # would be two data points on rolling dates

            # Think if the following code shouldn't be removed,
            # so that there would be 2 data points on rolling dates
            if i > 0:
                partial_prices_df = partial_prices_df.iloc[1:]
                partial_tte_tms = partial_tte_tms.iloc[1:]
                prices_df_list.append(partial_prices_df)

            time_to_expiration_tms_list.append(partial_tte_tms)
            returns_tms_list.append(partial_returns_df)

        prices_df = pd.concat(prices_df_list, axis=0)
        prices_df = cast_dataframe(prices_df, PricesDataFrame)
        time_to_expiration_tms = pd.concat(time_to_expiration_tms_list,
                                           axis=0)  # type: pd.Series
        returns_tms = pd.concat(returns_tms_list, axis=0)
        returns_tms = cast_series(returns_tms, ReturnsSeries)

        # set names for series
        self._set_series_names(contract_number, prices_df, returns_tms,
                               time_to_expiration_tms)

        return RollingContractData(prices_df, time_to_expiration_tms,
                                   returns_tms)

示例#3

0

显示文件

    def rolling_time_window(
            self, window_length: int, step: int, func: Callable[[Union["QFDataFrame", np.ndarray]], "QFSeries"]) \
            -> Union[None, "QFSeries", "QFDataFrame"]:
        """
        Runs a given function on each rolling window in the dataframe. The content of a rolling window is also
        a QFDataFrame thus the funciton which should be applied should accept a QFDataFrame as an argument.

        The function may return either a QFSeries (then the output of rolling_time_window will be QFDataFrame)
        or a scalar value (then the output of rolling_time_window will be QFSeries).

        The rolling window is moved along the time index (rows).

        Parameters
        ----------
        window_length
            number of rows which should be taken into rolling window
        step
            number of rows by which rolling window should be moved
        func
            function to apply on each rolling window. If it returns a QFSeries then the output of rolling_time_window()
            will be a QFDataFrame; if it returns a scalar value, the return value of rolling_time_window() will
            be a QFSeries

        Returns
        -------
        None (if the result of running the rolling window was empty)
        or QFSeries (if the function applied returned scalar value for each window)
        or QFDataFrame (if the function applied returned QFSeries for each window)
        """
        results_dict = dict()  # type: Dict[datetime, pd.Series]
        end_idx = self.num_of_rows

        while True:
            start_idx = end_idx - window_length
            if start_idx < 0:
                break

            patch = self.iloc[start_idx:end_idx, :]
            end_date = self.index[end_idx - 1]
            results_dict[end_date] = func(patch)

            end_idx -= step

        if not results_dict:
            return None

        first_element = next(iter(results_dict.values()))  # type: "QFSeries"

        if isinstance(first_element, pd.Series):
            result = QFDataFrame.from_dict(results_dict, orient='index')
            result = cast_dataframe(result, QFDataFrame)
        else:
            from qf_lib.containers.series.qf_series import QFSeries
            dates_and_values = [(date, value)
                                for date, value in results_dict.items()]
            dates, values = zip(*dates_and_values)
            result = QFSeries(index=dates, data=values)
            result = result.sort_index()

        return result

示例#4

0

显示文件

    def exponential_average(self, lambda_coeff: float = 0.94) -> "QFDataFrame":
        """
        Calculates the exponential average of a dataframe.

        Parameters
        ----------
        lambda_coeff
            lambda coefficient

        Returns
        -------
        smoothed version (exponential average) of the data frame

        """
        lambda_coefficients = self._prepare_value_per_column_list(lambda_coeff)
        lambda_coefficients_iter = self._get_iterator_for_pandas(
            lambda_coefficients)

        def exponential_avg_func(column,
                                 lambda_coeff_iter=lambda_coefficients_iter):
            lambda_coefficient = next(lambda_coeff_iter)
            smoothed_column = column.exponential_average(lambda_coefficient)
            return smoothed_column

        smoothed_df = self.apply(exponential_avg_func, axis=0)
        smoothed_df = cast_dataframe(smoothed_df, self._constructor)
        return smoothed_df

示例#5

0

显示文件

文件： data_model.py 项目： espiney/qf-lib

 def _setup_r_square_of_each_predictor(self):
     regressors_df = self.input_data.regressors_df
     corr_matrix = regressors_df.corr()
     corr_matrix = cast_dataframe(corr_matrix, output_type=QFDataFrame)
     vif = np.diagonal(inv(corr_matrix))
     r_squared_values = 1 - (1 / vif)
     self.r_squared_of_each_predictor = QFSeries(data=r_squared_values, index=regressors_df.columns.copy())

示例#6

0

显示文件

文件： drawdown_tms.py 项目： quarkfin/qf-lib

def drawdown_tms(input_data: InputData,
                 frequency: Frequency = None) -> InputData:
    """
    Calculates the timeseries of the same dates as prices_tms, which contains the drawdown value for each date.

    Parameters
    ----------

    input_data: QFSeries, QFDataFrame
        QF timeseries or multiple timeseries grouped into a DataFrame

    frequency: Frequency
        optional parameter that improves teh performance of the function as
        to_prices does not need to infer the frequency

    Returns
    -------
    QFSeries, QFDataFrame
        series of drawdowns (drawdown for each day). Drawdown for a given date is defined as the percentage difference
        between the the maximal price value up to the given date and the price value for that date.
    """
    prices_tms = input_data.to_prices(frequency=frequency)

    max_price_tms = prices_tms.cummax()
    drawdowns = 1 - prices_tms / max_price_tms

    if isinstance(input_data, QFSeries):
        drawdowns = cast_series(drawdowns, QFSeries)
    else:
        drawdowns = cast_dataframe(drawdowns, QFDataFrame)

    return drawdowns

示例#7

0

显示文件

 def _setup_correlations(self, fitted_tms):
     analysed_tms = self.input_data.analysed_tms
     regressors_df = self.input_data.regressors_df
     data_for_correlation = pd.concat(
         (fitted_tms, regressors_df, analysed_tms), axis=1)
     self.correlation_matrix = cast_dataframe(data_for_correlation.corr(),
                                              output_type=QFDataFrame)

示例#8

0

显示文件

def cast_dataframe_to_proper_type(result):
    num_of_dimensions = len(result.axes)
    if num_of_dimensions == 1:
        casted_result = cast_series(result, QFSeries)
    elif num_of_dimensions == 2:
        casted_result = cast_dataframe(result, QFDataFrame)
    else:
        casted_result = result

    return casted_result

示例#9

0

显示文件

    def _get_historical_data(self, exposures_tms, prices_array, ticker):
        prices_df = prices_array.loc[:, ticker, :].to_pandas()
        prices_df = cast_dataframe(prices_df, PricesDataFrame)

        open_prices_tms = prices_df.loc[:, PriceField.Open]
        low_prices_tms = prices_df.loc[:, PriceField.Low]
        high_prices_tms = prices_df.loc[:, PriceField.High]

        historical_data = pd.concat(
            (exposures_tms, open_prices_tms, low_prices_tms, high_prices_tms),
            axis=1)
        return historical_data

示例#10

0

显示文件

    def test_is_equal_risk_contribution(self):
        asset_a_tms = self.factors_df.loc[:, 'a']
        factors_df = pd.concat((asset_a_tms, asset_a_tms), axis=1)
        factors_df = cast_dataframe(factors_df, SimpleReturnsDataFrame)
        factors_df.columns = ['a', 'b']
        factors_covariance = factors_df.cov()

        weights = pd.Series([0.25, 0.75], index=self.factors_df.columns)
        actual_result = RiskContributionAnalysis.is_equal_risk_contribution(factors_covariance, weights)
        self.assertFalse(actual_result)

        weights = pd.Series([0.5, 0.5], index=self.factors_df.columns)
        actual_result = RiskContributionAnalysis.is_equal_risk_contribution(factors_covariance, weights)
        self.assertTrue(actual_result)

示例#11

0

显示文件

    def _calculate_portfolio_returns_tms(self, tickers, open_to_open_returns_df: QFDataFrame,
                                         exposure_values_df: QFDataFrame) \
            -> SimpleReturnsSeries:
        """
        SimpleReturnsSeries of the portfolio - for each date equal to the portfolio performance over the last
        open-to-open period, ex. value indexed as 2010-02-15 would refer to the portfolio value change between
        open at 14th and open at 15th, and would be based on the signal from 2010-02-13;

        the first index of the series is the Day 3 of the backtest, as the first signal calculation occurs
        after Day 1 (see ORDER OF ACTIONS below)
        the last index of the series is test_end_date and the portfolio exposure is being set to zero
        on the opening of the test_end_date

        ORDER OF ACTIONS:

        -- Day 1 --
        signal is generated, based on the historic data INCLUDING prices from Day 1
        suggested exposure for Day 2 is calculated

        -- Day 2 --
        a trade is entered, held or exited (or nothing happens) regarding the suggested exposure
        this action is performed on the opening of the day

        -- Day 3 --
        at the opening the open-to-open return is calculated
        now it is possible to estimate current portfolio value
        the simple return of the portfolio (Day 3 to Day 2) is saved and indexed with Day 3 date
        """

        open_to_open_returns_df = open_to_open_returns_df.dropna(how="all")
        shifted_signals_df = exposure_values_df.shift(2, axis=0)
        shifted_signals_df = shifted_signals_df.iloc[2:]

        daily_returns_of_strategies_df = shifted_signals_df * open_to_open_returns_df
        daily_returns_of_strategies_df = daily_returns_of_strategies_df.dropna(
            axis=0, how='all')

        daily_returns_of_strategies_df = cast_dataframe(
            daily_returns_of_strategies_df,
            SimpleReturnsDataFrame)  # type: SimpleReturnsDataFrame

        weights = Portfolio.one_over_n_weights(tickers)
        # for strategies based on more than one ticker (ex. VolLongShort) use the line below:
        # weights = QFSeries(np.ones(daily_returns_of_strategies_df.num_of_columns))

        portfolio_rets_tms, _ = Portfolio.constant_weights(
            daily_returns_of_strategies_df, weights)

        return portfolio_rets_tms

示例#12

0

显示文件

    def to_prices(self,
                  initial_prices: Sequence[float] = None,
                  suggested_initial_date: Union[datetime, int, float] = None,
                  frequency: Frequency = None) -> "PricesDataFrame":
        """
        Converts a dataframe to the dataframe of prices. The dataframe of prices returned will have an extra date
        at the beginning (in comparison to the returns' dataframe). The difference between the extra
        date and the rest of the dates can be inferred from the returns' dataframe or can be calculated using
        the frequency passed as the optional argument. Additional date at the beginning (so called "initial date")
        is caused by the fact, that return for the first date of prices timeseries cannot be calculated, so it's
        missing. Thus, during the opposite conversion, extra date at the beginning will be added.

        Parameters
        ----------
        initial_prices
            initial price for all timeseries. If no prices are specified, then they will be assumed to be 1. If only one
            value is passed (instead of a list with values for each column), then the initial price will be the same
            for each series contained within the dataframe.
        suggested_initial_date
            the first date or initial value for the prices series. It won't be necessarily the first date of the price
            series (e.g. if the method is run on the PricesDataFrame then it won't be used).
        frequency
            the frequency of the returns' timeseries. It is used to infer the initial date for the prices series.

        Returns
        -------
        prices
            dataframe of prices
        """
        initial_prices = self._prepare_value_per_column_list(initial_prices)

        initial_prices_iter = self._get_iterator_for_pandas(initial_prices)

        def to_prices_func(series,
                           init_prices_iter=initial_prices_iter,
                           suggested_init_date=suggested_initial_date,
                           freq=frequency):
            initial_price = next(init_prices_iter)
            prices_series = series.to_prices(
                initial_price=initial_price,
                suggested_initial_date=suggested_init_date,
                frequency=freq)
            return prices_series

        dataframe = self.apply(to_prices_func, axis=0)

        from qf_lib.containers.dataframe.prices_dataframe import PricesDataFrame
        dataframe = cast_dataframe(dataframe, PricesDataFrame)
        return dataframe

示例#13

0

显示文件

文件： risk_parity_boxes.py 项目： quarkfin/qf-lib

    def _get_assets_data(self, end_date, start_date, frequency):
        # download data
        asset_prices_df = self.bbg_data_provider.get_price(self.all_tickers, PriceField.Close, start_date, end_date, frequency)
        asset_prices_df = cast_dataframe(asset_prices_df, output_type=PricesDataFrame)

        # trim
        common_start, common_end = get_common_start_and_end(asset_prices_df)
        trimmed_asset_prices_df = asset_prices_df.loc[common_start:common_end, :]  # type: PricesDataFrame

        # remove intermediate NaNs
        trimmed_asset_prices_df = trimmed_asset_prices_df.fillna(method='pad')  # forward fill

        # convert to simple returns
        assets_rets = trimmed_asset_prices_df.to_simple_returns()

        return assets_rets

示例#14

0

显示文件

    def make_stats(self, initial_risks: Sequence[float],
                   scenarios_list: Sequence[QFDataFrame]) -> QFDataFrame:
        """
        Creates a pandas.DataFrame showing how many strategies failed (reached certain draw down level) and how many
        of them succeeded (that is: reached the target return and not failed on the way).

        Parameters
        ----------
        initial_risks: Sequence[float]
            list of initial_risk parameters where initial_risk is a float number
        scenarios_list: Sequence[pandas.DataFrame]
            list with scenarios (QFDataFrame) where each DataFrame corresponds to one initial_risk value
            Each DataFrame has columns corresponding to different scenarios and its indexed by Trades' ordinal number.
            Its values are returns of Trades.

        Returns
        -------
        pandas.DataFrame
            DataFrame indexed with initial_risk values and with columns FAILED (fraction of scenarios that failed)
            and SUCCEEDED (fraction of scenarios that met the objective and didn't fail on the way)
        """
        result = QFDataFrame(index=pd.Index(initial_risks),
                             columns=pd.Index([self.FAILED, self.SUCCEEDED]),
                             dtype=np.float64)

        for init_risk, scenarios in zip(initial_risks, scenarios_list):
            # calculate drawdown for each scenario

            scenarios_df = cast_dataframe(
                scenarios,
                SimpleReturnsDataFrame)  # type: SimpleReturnsDataFrame
            max_drawdowns = max_drawdown(scenarios_df)
            total_returns = scenarios_df.total_cumulative_return()

            failed = max_drawdowns >= self._max_accepted_dd
            reached_target_return = total_returns >= self._target_return
            succeeded = ~failed & reached_target_return

            num_of_scenarios = scenarios_df.num_of_columns
            failed_normalized = failed.sum() / num_of_scenarios
            succeeded_normalized = succeeded.sum() / num_of_scenarios

            result.loc[init_risk, [self.FAILED, self.SUCCEEDED]] = [
                failed_normalized, succeeded_normalized
            ]

        return result

示例#15

0

显示文件

    def to_log_returns(self) -> "LogReturnsDataFrame":
        """
        Converts dataframe to the dataframe of logarithmic returns. First date of prices in the returns dataframe
        won't be present.

        Returns
        -------
        returns_df
            dataframe of log returns
        """
        from qf_lib.containers.dataframe.log_returns_dataframe import LogReturnsDataFrame

        series_type = self._constructor_sliced
        dataframe = self.apply(series_type.to_log_returns, axis=0)
        dataframe = cast_dataframe(dataframe, LogReturnsDataFrame)

        return dataframe

示例#16

0

显示文件

    def to_simple_returns(self) -> "SimpleReturnsDataFrame":
        """
        Converts dataframe to the dataframe of simple returns. First date of prices in the returns timeseries won't
        be present.

        Returns
        -------
        returns_df
            dataframe of simple returns
        """
        from qf_lib.containers.dataframe.simple_returns_dataframe import SimpleReturnsDataFrame

        series_type = self._constructor_sliced
        dataframe = self.apply(series_type.to_simple_returns, axis=0)
        dataframe = cast_dataframe(dataframe, SimpleReturnsDataFrame)

        return dataframe

示例#17

0

显示文件

def cast_data_array_to_proper_type(result: QFDataArray, use_prices_types=False):
    if use_prices_types:
        series_type = PricesSeries
        data_frame_type = PricesDataFrame
    else:
        series_type = QFSeries
        data_frame_type = QFDataFrame

    num_of_dimensions = len(result.shape)
    if num_of_dimensions == 0:
        casted_result = result.item()
    elif num_of_dimensions == 1:
        casted_result = cast_series(result.to_pandas(), series_type)
        casted_result.name = result.name
    elif num_of_dimensions == 2:
        casted_result = cast_dataframe(result.to_pandas(), data_frame_type)
    else:
        casted_result = result

    return casted_result

示例#18

0

显示文件

    def min_max_normalized(
            self,
            original_min_values: Sequence[float] = None,
            original_max_values: Sequence[float] = None) -> "QFDataFrame":
        """
        Normalizes the data using min-max scaling: it maps all the data to the [0;1] range, so that 0 corresponds
        to the minimal value in the original series and 1 corresponds to the maximal value. It is also possible
        to specify values which should correspond to 0 and 1 after applying the normalization. It is useful if the same
        normalization parameters are used to normalize different data.

        Parameters
        ----------
        original_min_values
            values which should correspond to 0 after applying the normalization (one value for each column)
        original_max_values
            values which should correspond to 1 after applying the normalization (one value for each column)

        Returns
        -------
        normalized_dataframe
            dataframe of normalized values
        """
        # assert that user specified either both min and max values or none of them
        min_values = self._prepare_value_per_column_list(original_min_values)
        max_values = self._prepare_value_per_column_list(original_max_values)

        min_values_iter = self._get_iterator_for_pandas(min_values)
        max_values_iter = self._get_iterator_for_pandas(max_values)

        def min_max_norm_func(column,
                              min_val_iter=min_values_iter,
                              max_val_iter=max_values_iter):
            norm_column = column.min_max_normalized(next(min_val_iter),
                                                    next(max_val_iter))
            return norm_column

        norm_dataframe = self.apply(min_max_norm_func, axis=0)
        norm_dataframe = cast_dataframe(norm_dataframe, self._constructor)
        return norm_dataframe

示例#19

0

显示文件

 def _get_open_prices(self, prices_data_array):
     open_prices_pandas_df = prices_data_array.loc[:, :, PriceField.
                                                   Open].to_pandas()
     open_prices_df = cast_dataframe(open_prices_pandas_df, PricesDataFrame)
     return open_prices_df