Exemplo n.º 1
0
    def plot_stocks(self):
        """Plots the Expected annual Returns over annual Volatility of
        the stocks of the portfolio.

        """
        # annual mean returns of all stocks
        stock_returns = mean_returns(data=self._data, freq=self._freq)
        stock_volatility = volatility(data=self._data, freq=self._freq)
        # adding stocks of the portfolio to the plot
        # plot stocks individually:
        plt.scatter(stock_volatility,
                    stock_returns,
                    marker="o",
                    s=100,
                    label="Stocks")
        # adding text to stocks in plot:
        for i, txt in enumerate(stock_returns.index):
            plt.annotate(
                txt,
                (stock_volatility[i], stock_returns[i]),
                xytext=(10, 0),
                textcoords="offset points",
                label=i,
            )
            plt.legend()
Exemplo n.º 2
0
    def __init__(self, ticker, data: pd.DataFrame, **kwargs):
        if not isinstance(data, pd.DataFrame):
            raise ValueError('data should be a pandas.DataFrame')

        if isinstance(data.columns, pd.MultiIndex):
            self._data = clean_data(data).dropna(how="all")
        else:
            self._data = data.dropna(how="all")

        if not (ticker in self._data.columns):
            raise ValueError(f'Ticker {ticker} is not provided in DataFrame')

        self._ticker = ticker
        self._data = pd.DataFrame(self._data[ticker])

        self._risk_free_rate = kwargs.get('risk_free_rate', 0.001)
        self._freq = kwargs.get('freq', 252)
        self._type = kwargs.get('type', 'log')

        self._daily_returns = daily_log_returns(self._data)

        ##########PROPERTIES##########
        self._returns = mean_returns(self._data, freq=self._freq, type=self._type).values[0]
        self._volatility = volatility(self._data).values[0]
        self._downside_volatility = downside_volatility(self._data).values[0]
        self._sharp = (self._returns - self._risk_free_rate)/self._volatility
        self._sortino = (self.returns - self._risk_free_rate)/self._downside_volatility
        self._skew = self._data.skew().values[0]
        self._kurtosis = self._data.kurt().values[0]
Exemplo n.º 3
0
def ratios(data: pd.DataFrame, risk_free_rate=0.001, verbouse = False):
    yearly = mean_returns(data, type='log')
    vol = volatility(data)
    downside_vol =  downside_volatility(data)

    df_results = pd.concat(
        [yearly, vol, downside_vol],
        keys=['Yearly mean returns', 'Volatility', 'Downside Volatility'], join='inner', axis=1)

    df_results['Sharp Ratio'] =  (df_results['Yearly mean returns']-risk_free_rate)/df_results['Volatility']
    df_results['Sortino Ratio'] = (df_results['Yearly mean returns'] - risk_free_rate) / df_results['Downside Volatility']

    if verbouse:
        with pd.option_context('display.max_rows', None, 'display.max_columns', None):
            print(df_results)

    return df_results
Exemplo n.º 4
0
def pf_valuation(weights, data: pd.DataFrame, risk_free_rate=0.001, freq=252):
    weights_ndarray = None
    if isinstance(weights, pd.DataFrame):
        if len(weights.columns) > 1:
            raise ValueError(
                'Incorrect dataframe with weights provided. Expected 1 column with weights'
            )

        weights_list = []
        for column in data.columns:
            stock_weight = weights.at[column, weights.columns[0]]
            weights_list.append(stock_weight)

        weights_ndarray = np.array(weights_list)

    elif isinstance(weights, np.ndarray):
        weights_ndarray = weights
    else:
        raise ValueError('Weights should be numpy ndarray or pd.DataFrame')

    if len(weights_ndarray) < len(data.columns) or len(weights_ndarray) > len(
            data.columns):
        raise ValueError('Incorrect data or weights were provided')

    cvm = cov_matrix(data)
    stocks_yearly_returns = mean_returns(data, freq=freq, type='log')
    stocks_yearly_downside_vol = downside_volatility(data, freq=freq)

    returns = pf_mean_returns(weights_ndarray, stocks_yearly_returns)
    volatility = pf_volatility(
        weights_ndarray, cvm,
        freq=freq)  # Annual standard deviation = volatility
    sh_ratio = (returns - risk_free_rate) / volatility
    pf_stocks_yearly_downside_vol = pf_negative_volatility(
        weights=weights_ndarray,
        stocks_yearly_downside_vol=stocks_yearly_downside_vol)
    sor_ratio = (returns - risk_free_rate) / pf_stocks_yearly_downside_vol

    return {
        'Returns': returns,
        'Volatility': volatility,
        'Sharp': sh_ratio,
        'Downside volatility': pf_stocks_yearly_downside_vol,
        'Sortino': sor_ratio
    }
Exemplo n.º 5
0
    def __init__(self, data: pd.DataFrame, weights = None, risk_free_rate=0.0425, freq=252):
        if not isinstance(freq, int):
            raise ValueError('Frequency must be an integer')
        elif freq <= 0:
            raise ValueError('Freq must be > 0')
        else:
            self._freq = freq

        if not isinstance(risk_free_rate, (float, int)):
            raise ValueError('Risk free rate must be a float or an integer')
        else:
            self._risk_free_rate = risk_free_rate

        if not isinstance(data, pd.DataFrame):
            raise ValueError('data should be a pandas.DataFrame')

        if isinstance(data.columns, pd.MultiIndex):
            self._data = clean_data(data)
        else:
            self._data = data

        self._portfolios = None
        self._min_vol_port = None
        self._min_downside_vol_port = None
        self._max_sharpe_port = None
        self._max_sortino_port = None
        self._df_results = None

        #####################
        if weights is None:
           self._weights = np.array([1./len(self._data.columns) for i in range(len(self._data.columns))])
        else:
            self._weights = np.array(weights)

        self._cvm = cov_matrix(self._data)
        self._mr = mean_returns(self._data, freq=self._freq)
Exemplo n.º 6
0
# sharp_data14_17 = clean_data(sharp_data14_17)
# pf_sharp14_17 = portfolio(data=sharp_data14_17, risk_free_rate=0.001, freq=252, num_portfolios=1000)
# pf_sharp14_17.plot_portfolios()
# pf_sharp14_17.print_results()
# pf_proposed_sharp14_17 = pf_sharp14_17.max_sharp_port

print('=' * 80)
print('Sortino portfolio 17-20')
df_weights_sortino = pd.DataFrame(pf_proposed_sortino14_17,
                                  index=pf_stocks_sortino)
sortino_data17_20 = download(source=Source.YFINANCE,
                             tickers=pf_stocks_sortino,
                             start_date=start_date17_20,
                             end_date=end_date17_20)
sortino_data17_20 = clean_data(sortino_data17_20)
stocks_yearly_returns = mean_returns(sortino_data17_20)
returns = pd.concat([df_weights_sortino, stocks_yearly_returns],
                    keys=['Weights', 'Yearly returns'],
                    join='inner',
                    axis=1)
returns.columns = returns.columns.droplevel(1)
returns['Weighted return'] = returns['Weights'] * returns['Yearly returns']
ret17_20 = returns['Weighted return'].sum()
print(f'Yearly returns - 17 - 20 :{ret17_20}')

# print('=' * 80)
# print('Sharp portfolio 17-20')
# df_weights_sharp = pd.DataFrame(pf_proposed_sortino14_17, index = pf_stocks_sharp)

plt.show()
Exemplo n.º 7
0
def cluster_stocks(data: pd.DataFrame, n_clusters=5, verbose=False):
    """ Gets the number of clusters and tries to cluster(KMeans) stocks based on
        the mean returns and volatility. The decision about optimal number
        of clusters can be made based on an elbow curve. Max number of cluster is
        20.
        Good article about elbow curve:
        https://blog.cambridgespark.com/how-to-determine-the-optimal-number-of-clusters-for-k-means-clustering-14f27070048f
        The function creates following plots:
         1. Elbow curve to make decision about optimal number of clusters
         2. A plot with K-Means clustered by return and volatility stocks and centroids.
         3. Plots with clusters and their daily return cumulative sum over the given period
        :Input:
             : data: ``pandas.DataFrame`` stock prices
             :n_clusters: ``int`` (default: 5), should be > 2 and less than number of stocks in
             portfolio
             :verbose: ``boolean`` (default= ``False``), whether to print out clusters
        :Output:
             :clusters: ``list`` of (Stocks) tickers.
    """

    if not isinstance(n_clusters, int):
        raise ValueError('Total number of clusters must be integer.')
    elif n_clusters < 2:
        raise ValueError(f'Total number of clusters({len(data.columns)}) must be > 2.')
    elif len(data.columns) < 3:
        raise ValueError(f'Total number of stocks in portfolio({len(data.columns)}) must be > 2.')
    elif n_clusters > len(data.columns):
        raise ValueError(f'Total number of clusters({n_clusters}) '
                         f'must be <= number of stocks({len(data.columns)}) in portfolio')

    if isinstance(data.columns, pd.MultiIndex):
        data = clean_data(data)

    pf_return_means = mean_returns(data, type='log')
    pf_daily_returns = daily_log_returns(data)
    pf_volatility = volatility(data)
    # format the data as a numpy array to feed into the K-Means algorithm
    data_ret_vol = np.asarray([np.asarray(pf_return_means), np.asarray(pf_volatility)]).T

    distorsions = []
    max_n_clusters = min(20, len(data.columns))

    for k in range(2, max_n_clusters):
        k_means = KMeans(n_clusters=k)
        k_means.fit(X=data_ret_vol)
        distorsions.append(k_means.inertia_)

    plt.plot(
        range(2, max_n_clusters),
        distorsions,
        linestyle='-',
        color='red',
        lw=2,
        label='Elbow curve',
    )
    plt.title('Elbow curve')
    plt.xlabel('Number of clusters')
    plt.ylabel('Distortion')
    plt.grid(True)
    plt.legend()

    # Step size of the mesh. Decrease to increase the quality of the VQ.
    h = .002  # point in the mesh [x_min, x_max]x[y_min, y_max].

    x_min, x_max = data_ret_vol[:, 0].min() - 0.1, data_ret_vol[:, 0].max() + 0.1
    y_min, y_max = data_ret_vol[:, 1].min() - 0.1, data_ret_vol[:, 1].max() + 0.1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

    km = KMeans(n_clusters=n_clusters)
    km.fit(data_ret_vol)

    centroids = km.cluster_centers_

    # Obtain labels for each point in mesh. Use last trained model.
    Z = km.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

    # some plotting using numpy's logical indexing
    plt.figure(figsize=(10, 6))
    plt.imshow(Z, interpolation='nearest',
               extent=(xx.min(), xx.max(), yy.min(), yy.max()),
               cmap=plt.cm.Paired,
               aspect='auto', origin='lower')

    # Plot the centroids as a white X
    plt.scatter(centroids[:, 0], centroids[:, 1],
                marker='*', s=420,
                color='white', zorder=10)
    # Plot stocks
    plt.plot(data_ret_vol[:, 0],
             data_ret_vol[:, 1],
             'o',
             markersize=12)

    plt.title('K-means clustering\n'
              'Centroids are marked with white star')
    plt.xlabel('Returns')
    plt.ylabel('Volatility')

    idx, _ = vq(data_ret_vol, centroids)
    clusters = {}

    for i in list(set(idx)):
        clusters[i] = []

    for name, cluster in zip(pf_return_means.index, idx):
        clusters[cluster].append(name)

    # Calculating avg comulative daily return for each cluster and store
    # in pf_daily_returns under special stock name - avg{Cluster index}
    for i in list(set(idx)):
        s = 'avg' + str(i)
        pf_daily_returns[s] = pf_daily_returns[clusters[i]].mean(axis=1)

    for n in range(n_clusters):
        # plot clusters
        plt.figure(figsize=(10, 6))

        for stock in clusters[n]:
            # plot stocks as grey lines
            plt.plot(pf_daily_returns[stock].cumsum(), 'gray', linewidth=1)

        plt.title(f'Cluster #{n}')
        plt.ylabel("Daily returns cumulative sum")
        # plot average to see cluster dynamic
        s = 'avg' + str(n)
        plt.plot(pf_daily_returns[s].cumsum(), 'red', linewidth=3)
        plt.xticks(rotation=30)
        plt.grid(True)

        if verbose:
            print(f'Cluster #{n}')
            print(clusters[n])


    return clusters
Exemplo n.º 8
0
def mc_random_portfolios(data: pd.DataFrame,
                         risk_free_rate=0.01,
                         num_portfolios=10000,
                         freq=252):
    pbar = tqdm(total=num_portfolios)

    pf_ret = []  # Define an empty array for pf returns
    pf_vol = []  # Define an empty array for pf volatility
    pf_down_vol = []  # Define an empty array for pf downside volatility
    pf_weights = []  # Define an empty array for asset weights
    pf_sharp_ratio = []  # Define an empty array for Sharp ratio
    pf_sortino_ratio = []  # Define an empty array for Sortino ratio

    cvm = cov_matrix(data)
    stocks_returns = mean_returns(data, freq=freq, type="log")
    stocks_negative_volatility = negative_volatility(data)

    num_assets = len(data.columns)

    for idx, portfolio in enumerate(range(num_portfolios)):
        # weights = np.random.random(num_assets)
        # weights = weights / np.sum(weights)

        # weights = random_weights_norm(num_assets)
        weights = random_weights_exp(num_assets)

        pf_weights.append(weights)
        # Returns are the product of individual expected returns of asset and its weights
        returns = pf_mean_returns(weights, stocks_returns)
        pf_ret.append(returns)

        volatility = pf_volatility(
            weights, cvm, freq=freq)  # Annual standard deviation = volatility
        pf_vol.append(volatility)

        sh_ratio = (returns - risk_free_rate) / volatility
        pf_sharp_ratio.append(sh_ratio)

        pf_stocks_yearly_downside_vol = pf_negative_volatility(
            weights=weights,
            stocks_yearly_downside_vol=stocks_negative_volatility)
        pf_down_vol.append(pf_stocks_yearly_downside_vol)

        sor_ratio = (returns - risk_free_rate) / pf_stocks_yearly_downside_vol
        pf_sortino_ratio.append(sor_ratio)

        if idx % 1000 == 0:
            pbar.update(1000)

    pbar.close()

    df_rv = {
        "Returns": pf_ret,
        "Volatility": pf_vol,
        "Down. Volatility": pf_down_vol,
        "Sharp Ratio": pf_sharp_ratio,
        "Sortino Ratio": pf_sortino_ratio,
    }

    for counter, symbol in enumerate(data.columns, start=0):
        df_rv[symbol] = [w[counter] for w in pf_weights]

    portfolios = pd.DataFrame(df_rv)

    return portfolios
Exemplo n.º 9
0
    def __init__(self,
                 data: pd.DataFrame,
                 weights=None,
                 risk_free_rate=0.0425,
                 freq=252):
        if not isinstance(freq, int):
            raise ValueError("Frequency must be an integer")
        elif freq <= 0:
            raise ValueError("Freq must be > 0")
        else:
            self._freq = freq

        if not isinstance(risk_free_rate, (float, int)):
            raise ValueError("Risk free rate must be a float or an integer")
        else:
            self._risk_free_rate = risk_free_rate

        if not isinstance(data, pd.DataFrame):
            raise ValueError("data should be a pandas.DataFrame")

        if isinstance(data.columns, pd.MultiIndex):
            self._data = clean_data(data)
        else:
            self._data = data

        self._mc_portfolios = None
        self._mc_min_vol_port = None
        self._mc_min_downside_vol_port = None
        self._mc_max_sharpe_port = None
        self._mc_max_sortino_port = None
        self._mc_simulations_results = None
        self._weights = None
        #####################
        if weights is None:
            self._weights = np.array([
                1.0 / len(self._data.columns)
                for i in range(len(self._data.columns))
            ])
        else:
            if isinstance(weights, pd.DataFrame):
                if len(weights.columns) > 1:
                    raise ValueError(
                        "Incorrect dataframe with weights provided. Expected 1 column with weights"
                    )

                weights_list = []
                for column in data.columns:
                    stock_weight = weights.at[column, weights.columns[0]]
                    weights_list.append(stock_weight)

                self._weights = np.array(weights_list)

            elif isinstance(weights, np.ndarray):
                self._weights = weights
            else:
                raise ValueError(
                    "Weights should be numpy ndarray or pd.DataFrame")

        if len(self._weights) < len(self._data.columns) or len(
                self._weights) > len(self._data.columns):
            raise ValueError("Incorrect data or weights were provided")

        self._cvm = cov_matrix(self._data)
        self._mr = mean_returns(self._data, freq=self._freq)
        self._negative_vol = negative_volatility(data=self._data,
                                                 freq=self._freq)

        self._df_perfomamce = {}
        self._df_perfomamce["Returns"] = self.returns
        self._df_perfomamce["Volatility"] = self.volatility
        self._df_perfomamce["Down. Volatility"] = self.negative_volatility
        self._df_perfomamce["Sharp Ratio"] = self.sharp
        self._df_perfomamce["Sortino Ratio"] = self.sortino