Exemplo n.º 1
0
    def test_aggregate_by_year(self):
        dates = pd.DatetimeIndex(['2015-06-01', '2015-12-30', '2016-01-01', '2016-05-01'])
        test_dataframe = SimpleReturnsDataFrame(data=self.simple_returns_values, index=dates)

        expected_aggregated_rets = [[2.000000, 2.000000, 2.000000, 2.000000, 2.000000],
                                    [0.666666, 0.666666, 0.666666, 0.666666, 0.666666]]
        expected_dataframe = SimpleReturnsDataFrame(data=expected_aggregated_rets,
                                                    index=pd.DatetimeIndex(['2015-12-31', '2016-12-31']))

        actual_dataframe = test_dataframe.aggregate_by_year()

        assert_dataframes_equal(expected_dataframe, actual_dataframe)
Exemplo n.º 2
0
    def make_scenarios(self, trade_rets: Sequence[float], scenarios_length: int = 100, num_of_scenarios: int = 10000) \
            -> SimpleReturnsDataFrame:
        """
        Utility function to generate different trades scenarios, where each scenario is a series of returns for a given
        investment strategy.
        The scenarios of a given length are created by randomly choosing (with replacement) returns from the original
        sequence of a Trade's returns. The result is the SimpleReturnsDataFrame which is indexed by the Trade's
        ordinal number and has a scenario in each column.

        Parameters
        ----------
        trade_rets: Sequence[float]
            sequence of floats which represent the returns on Trades performed by some investment strategy
        scenarios_length: int
            number of Trades which should simulated for each scenario
        num_of_scenarios: int
            number of scenarios which should be generated

        Returns
        -------
        SimpleReturnsDataFrame
            data frame of size scenarios_length (rows) by num_of_scenarios (columns). It contains float numbers.
        """
        values = np.random.choice(trade_rets,
                                  scenarios_length * num_of_scenarios)
        values = np.reshape(values, (scenarios_length, num_of_scenarios))

        return SimpleReturnsDataFrame(values)
Exemplo n.º 3
0
    def setUp(self):
        self.dates = pd.date_range(start='2015-05-13', periods=5)
        self.column_names = ['a', 'b', 'c', 'd', 'e']

        self.prices_values = [[1., 1., 1., 1, 1.], [2., 2., 2., 2., 2.],
                              [3., 3., 3., 3., 3.], [4., 4., 4., 4., 4.],
                              [5., 5., 5., 5., 5.]]
        self.test_prices_df = PricesDataFrame(data=self.prices_values,
                                              index=self.dates,
                                              columns=self.column_names)

        self.log_returns_values = [
            [0.693147, 0.693147, 0.693147, 0.693147, 0.693147],
            [0.405465, 0.405465, 0.405465, 0.405465, 0.405465],
            [0.287682, 0.287682, 0.287682, 0.287682, 0.287682],
            [0.223144, 0.223144, 0.223144, 0.223144, 0.223144]
        ]
        self.test_log_returns_df = LogReturnsDataFrame(
            data=self.log_returns_values,
            index=self.dates[1:],
            columns=self.column_names)

        self.simple_returns_values = [
            [1.000000, 1.000000, 1.000000, 1.000000, 1.000000],
            [0.500000, 0.500000, 0.500000, 0.500000, 0.500000],
            [0.333333, 0.333333, 0.333333, 0.333333, 0.333333],
            [0.250000, 0.250000, 0.250000, 0.250000, 0.250000]
        ]
        self.test_simple_returns_df = SimpleReturnsDataFrame(
            data=self.simple_returns_values,
            index=self.dates[1:],
            columns=self.column_names)
Exemplo n.º 4
0
def get_analysed_tms_and_regressors(dates_span: int = 1000, num_of_regressors: int = 7,
                                    start_date: datetime.datetime = str_to_date('2016-01-01'),
                                    mean_return: float = 0.001, std_of_returns: float = 0.02,
                                    a_coeff: float = -0.25, b_coeff: float = 1.25, intercept: float = 0.004)\
        -> Tuple[SimpleReturnsSeries, SimpleReturnsDataFrame]:
    """
    Creates a dataframe with simple returns of sample timeseries (regressors). Then creates a series which linearly
    depends on regressors 'a' and 'b'.
    """
    dates = pd.bdate_range(start=start_date, periods=dates_span)
    regressors_names = generate_sample_column_names(
        num_of_columns=num_of_regressors)
    np.random.seed(
        5
    )  # init random number generator with a fixed number, so that results are always the same

    regressors_data = np.random.normal(mean_return, std_of_returns,
                                       (dates_span, num_of_regressors))
    regressors_df = SimpleReturnsDataFrame(data=regressors_data,
                                           index=dates,
                                           columns=regressors_names)

    analyzed_data = a_coeff * regressors_data[:, 0] + b_coeff * regressors_data[:, 1] + \
        np.random.normal(0, 0.02, dates_span) + intercept

    analysed_tms = SimpleReturnsSeries(data=analyzed_data,
                                       index=dates,
                                       name='Fund')

    return analysed_tms, regressors_df
def convert_dataframe_frequency(
        dataframe: QFDataFrame,
        frequency: Frequency) -> SimpleReturnsDataFrame:
    """
    Converts each column in the dataframe to the specified frequency.
    ValueError is raised when a column has a lower frequency than the one we are converting to.
    """
    # Verify that all columns in the dataframe have a lower frequency.
    data_frequencies = dataframe.get_frequency()
    for column, col_frequency in data_frequencies.items():
        if col_frequency < frequency:
            raise ValueError(
                "Column '{}' cannot be converted to '{}' frequency because its frequency is '{}'."
                .format(column, frequency, col_frequency))

    if frequency == Frequency.DAILY:
        return dataframe.to_simple_returns()

    filled_df = dataframe.to_prices().fillna(method="ffill")
    new_columns = {}
    for column in filled_df:
        new_columns[column] = get_aggregate_returns(filled_df[column],
                                                    frequency)

    return SimpleReturnsDataFrame(new_columns)
Exemplo n.º 6
0
    def setUp(self):
        dates_span = 100
        regressor_names = ['a', 'b', 'c']

        dates = pd.date_range(start='2015-01-01', periods=dates_span, freq='D')

        fund_returns_tms = SimpleReturnsSeries(
            data=[i / 100 for i in range(1, dates_span + 1)], index=dates)
        deviation = 0.005
        fit_returns_tms = SimpleReturnsSeries(data=(fund_returns_tms.values +
                                                    deviation),
                                              index=dates)

        regressors_returns_df = SimpleReturnsDataFrame(data=np.array([
            fund_returns_tms, fund_returns_tms + deviation,
            fund_returns_tms - deviation
        ]).T,
                                                       index=dates,
                                                       columns=regressor_names)
        coefficients = QFSeries(index=regressor_names, data=[1.0, 1.0, 1.0])

        self.fund_returns_tms = fund_returns_tms
        self.fit_returns_tms = fit_returns_tms
        self.regressors_returns_df = regressors_returns_df
        self.coefficients = coefficients

        self.alpha = 0.005
Exemplo n.º 7
0
def generate_random_paths(sample_len: int,
                          sample_size: int,
                          mean: float,
                          std: float,
                          leverage: float = 1.0):
    """ Generates random paths.

    Parameters
    ------------
    sample_len: int
        length of each path of data, equivalent to time
    sample_size: int
        Number of paths simulated
    mean: float
        mean simle return
    std: float
        standard deviation of returns
    leverage: float
        leverage used in the simulated investment process

    Returns
    -----------
    SimpleReturnsDataFrame
        indexed by steps with paths as columns
    """
    mean = mean * leverage
    std = std * leverage

    time = np.arange(1, 1 + sample_len)

    returns_vector = np.random.normal(loc=mean,
                                      scale=std,
                                      size=(sample_len * sample_size, 1))
    returns = np.reshape(returns_vector, (sample_len, sample_size))
    return SimpleReturnsDataFrame(data=returns, index=time)
Exemplo n.º 8
0
    def setUp(self):
        portfolio_rets = [0.01, 0.02, -0.03, 0.04, -0.05, 0.06]
        asset_1_rets = [0.011, 0.035, -0.028, 0.039, -0.044, 0.061]
        asset_2_rets = [0.02, 0.04, -0.06, 0.08, -0.1, 0.12]
        dates = pd.date_range(start='2015-02-01', periods=6)

        self.portfolio_tms = SimpleReturnsSeries(portfolio_rets, dates)
        returns_array = np.array([asset_1_rets, asset_2_rets]).T
        self.factors_df = SimpleReturnsDataFrame(data=returns_array, index=dates, columns=['a', 'b'])
Exemplo n.º 9
0
    def _create_test_dataframe(cls):
        values = [[np.nan, 0.0, 0.0, 0.0, 0.0], [1.0, np.nan, 1.0, 1.0, 1.0],
                  [2.0, np.nan, np.nan, 2.0, 2.0],
                  [3.0, 3.0, 3.0, np.nan, 3.0], [4.0, 4.0, 4.0, 4.0, 4.0],
                  [5.0, 5.0, 5.0, 5.0, 5.0]]

        index = pd.date_range(start='2015-01-01', periods=6)
        columns = ['a', 'b', 'c', 'd', 'e']
        dataframe = SimpleReturnsDataFrame(data=values,
                                           index=index,
                                           columns=columns)

        return dataframe
Exemplo n.º 10
0
    def test_proxy_using_values(self):
        expected_values = [[0.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0],
                           [2.0, 0.0, 2.0, 2.0], [3.0, 3.0, 0.0, 3.0],
                           [4.0, 4.0, 4.0, 4.0], [5.0, 5.0, 5.0, 5.0]]
        expected_columns = ['a', 'c', 'd', 'e']
        expected_dates = self.test_dataframe.index.copy()
        expected_dataframe = SimpleReturnsDataFrame(data=expected_values,
                                                    columns=expected_columns,
                                                    index=expected_dates)
        self.data_cleaner.threshold = 0.2

        actual_dataframe = self.data_cleaner.proxy_using_value(proxy_value=0.0)

        assert_dataframes_equal(expected_dataframe, actual_dataframe)
Exemplo n.º 11
0
    def test_proxy_using_regression(self):
        expected_values = [[np.nan, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0],
                           [2.0, 2.0, 2.0, 2.0], [3.0, 3.0, 3.0, 3.0],
                           [4.0, 4.0, 4.0, 4.0], [5.0, 5.0, 5.0, 5.0]]
        expected_columns = ['a', 'c', 'd', 'e']
        expected_dates = self.test_dataframe.index.copy()
        expected_dataframe = SimpleReturnsDataFrame(data=expected_values,
                                                    columns=expected_columns,
                                                    index=expected_dates)
        self.data_cleaner.threshold = 0.2

        actual_dataframe = self.data_cleaner.proxy_using_regression(
            benchmark_tms=self.test_benchmark,
            columns_type=SimpleReturnsSeries)

        assert_dataframes_equal(expected_dataframe, actual_dataframe)
Exemplo n.º 12
0
    def form_different_is_and_oos_sets(
            self, multiple_returns_timeseries: QFDataFrame) -> Tuple:
        """
        Splits slices into two groups of equal sizes for all possible combinations.

        Returns an list of tuples. 1st element of the tuple contains the In-Sample set and the 2nd one contains the
        Out-Of-Sample set (both in form of QFDataFrames). Each tuple contains one of possible combinations
        of slices forming IS and OOS sets. E.g. if there are 4 slices: A,B,C,D then one of possible
        combinations is IS: A,B and OOS: C,D. The given example will be one of rows of the result list.
        A and B (C and D) will be concatenated (so that there will be one timeseries AB),
        and so will be one CD timeseries.
        """
        # Drop all rows not aligned to num_of_slices. E.g if the df has 233 rows and the num_of_slices is 50,
        # then last 33 rows of the original matrix will be dropped
        rows_to_keep = (multiple_returns_timeseries.num_of_rows //
                        self.num_of_slices) * self.num_of_slices
        aligned_df = multiple_returns_timeseries.iloc[:rows_to_keep]
        if aligned_df.empty:
            raise ValueError("Too few rows in the data frame.")

        size_of_slices = aligned_df.num_of_rows // self.num_of_slices
        df_slices = [
            SimpleReturnsDataFrame(aligned_df.iloc[i:i + size_of_slices, :])
            for i in range(0, len(aligned_df), size_of_slices)
        ]

        # The index order of the original data frame should be preserved in both IS and OOS dfs
        new_index = aligned_df.index[:len(aligned_df) // 2]
        is_dfs = [
            pd.concat(slices) for slices in itertools.combinations(
                df_slices, self.num_of_slices // 2)
        ]
        oos_dfs = [
            aligned_df.loc[aligned_df.index.difference(df.index)]
            for df in is_dfs
        ]

        # Adjust the indices at the end, after the oos_dfs and is_dfs are already computed
        for df in itertools.chain(is_dfs, oos_dfs):
            df.index = new_index

        return is_dfs, oos_dfs