/
Portfolios.py
328 lines (254 loc) · 13.5 KB
/
Portfolios.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
# -*- coding: utf-8 -*-
"""
Created on Sat Oct 15 14:00:23 2016
@author: Hugo Fallourd, Dakota Wixom, Yun Chen, Sanket Sojitra, Sanjana Cheerla, Wanting Mao, Chay Pimmanrojnagool, Teng Fei
This file implements a Portfolios class to calculate the Optimal tangent portfolio,
tangent to the efficient frontier or maximal sharpe ratio. The class also
calculates the Black Litterman portfolio
"""
import numpy
import math
import quandl
from scipy.optimize import minimize
import pandas
from BlackLitterman import bl_omega
from BlackLitterman import altblacklitterman
from Predictor import Predictor
from DataHandler import DataHandler
from functools import partial
from multiprocessing.dummy import Pool
class Portfolios(object):
"""Constructor"""
def __init__(self,sDate,eDate,window,rebalance,h,db):
self.startDate = sDate
self.endDate = eDate
self.window=window
self.rebalanceFrequency=rebalance
self.horizon = h
self.db=db
"""Return Benchmark value from Quandl"""
def GetBenchmarkPortfolio(self,benchmark,apiKey):
quandl.ApiConfig.api_key = apiKey
data = quandl.get(benchmark,start_date=self.startDate, end_date=self.endDate)
benchmarkData = data[['Index Value']]
benchmarkData = benchmarkData.dropna() #Remove NaN data
return benchmarkData
"""Calculate tangent portfolio (max sharpe ratio) using machine learning for expected returns"""
def GetOptimalPortfolio(self,excludedStocks):
#Get list of all stocks excluding some stocks because missing data
stocks = [x['BBGTicker'] for x in list(self.db.Stocks.find({}))]
stocks = [x for x in stocks if x not in excludedStocks]
Xtot ={}
Ytot ={}
dh = DataHandler
#Download machine learning features technical indicators for each stock and insert in dictionary
for s in stocks:
#Get the list of dates using prices
dataMongo = self.db.Prices.find_one({'BBGTicker' : s},{'Close' : 1})
data=dataMongo['Close']
dates = sorted(data.keys())
#Use data only from 2009-02-04, earlier data are irrelevant
filterDate='2009-02-04'
filterDate = dh.HandleIncorrectDate(filterDate,'',dates)
indexDate = dates.index(filterDate)
dates = dates[indexDate:len(dates)-1-self.horizon]
#For each stock insert features into dictionary Xtot and output future return into dictionary Ytot
pr = Predictor(s,self.endDate,self.horizon,self.db)
Xtot[s]= {"ANALYTICS": pr.GetMLInputs(dates) , "DATES" : dates}
Ytot[s]= {"RETURNS": pr.GetMLOutputs(dates) , "DATES" : dates}
#Get a dataframe with date as index and prices for each stock (columns)
df = self.GetPricesDataFrame(stocks)
#Get matrix (array) of return, removing NaN numbers
ret = self.GetCleanReturns(df)
n=ret.shape[0] #Number of rows, or number of return for each stock
m=ret.shape[1] #number of stocks
#Given number of dates and frequency (every h days), get the number of time we want to rebalance
self.rebalanceFreq=min(n,self.rebalanceFrequency)
rebalanceTotal = math.floor((n - n%self.rebalanceFreq-self.window)/self.rebalanceFreq)
portValue=[]
htot=[]
mutot=[]
#for eachbtime portfolio is rebalanced calculate optimal weights
for k in range(rebalanceTotal+1):
st=0
mu = numpy.empty(m) #expected return vector for each stock
#Define partial function to use multi thread pool
func = partial(self.GetExpectedReturns,Xtot,Ytot,k)
pool = Pool(len(stocks))
#Calculate expected returns using parallel programming multithreading
results = pool.map(func, stocks)
#close the pool and wait for the work to finish
pool.close()
pool.join()
#Ensure we map the expected return to the correct stock and store in numpy array mu
for s in stocks:
for res in results:
if res[0]==s:
mu[st] = res[1]
st=st+1
break
#Calculate covariance matrix
V = numpy.cov(ret[k*self.rebalanceFreq:k*self.rebalanceFreq+self.window].T)
#Insert expected return vector for each rebalancing
mutot.append(mu)
#Set optimization constraints (sum of weights = 1) and boundaries (0 <= weight <= 1)
bnds = ((0,1),)*m
cons = ({'type': 'eq', 'fun': lambda x: numpy.sum(x)-1.0})
h0 = numpy.ones(m)
#Minimize the inverse of sharpe ratio to get optimal weights h
res= minimize(self.sharpe, h0, args=(V,mu,0.001,)
,method='SLSQP',constraints=cons,bounds=bnds)
h=res.x
htot.append(h) #append weights for each rebalancing
#Apply optimal weights to dataframe price to get the portfolio values for each date
#Handle the reinvestment when the portfolio is rebalanced
if (k==rebalanceTotal):
temp = df[k*self.rebalanceFreq+self.window:df.shape[0]-1].dot(h)
else:
temp = df[k*self.rebalanceFreq + self.window:(k+1)*self.rebalanceFreq + self.window].dot(h)
if k==0:
temp = temp/temp[0]
elif k>0:
temp = temp/temp[0] * portValue[k-1][portValue[k-1].size-1]
portValue.append(temp)
portValue = self.GetDataFrameFromList(portValue) #Get a dataframe instead of list
return htot,portValue,mutot,stocks
"""Return stock expected return using this function in order to use multithreading"""
def GetExpectedReturns(self,Xtot,Ytot,k,stock):
#Retrieve the dates for each adjustment close price or close price
dataMongo = self.db.Prices.find_one({'BBGTicker' : stock},{'Adj Close' : 1})
if 'Adj Close' not in list(dataMongo.keys()):
dataMongo = self.db.Prices.find_one({'BBGTicker' : stock},{'Close' : 1})
data=dataMongo['Close']
else:
data=dataMongo['Adj Close']
dates = sorted(data.keys())
#Handle incorrect start and end date entered by user
dh = DataHandler
self.startDate = dh.HandleIncorrectDate(self.startDate,'',dates)
self.endDate = dh.HandleIncorrectDate('',self.endDate,dates)
dates = dates[dates.index(self.startDate):dates.index(self.endDate)]
#Prediction must consider the window for covariance matrix
startDate = dates[k*self.rebalanceFreq + self.window]
pr = Predictor(stock,startDate,self.horizon,self.db)
#Lookup output and features and
TIsDates = Xtot[stock]["DATES"]
Xtrain = Xtot[stock]["ANALYTICS"][:TIsDates.index(startDate)]
Ytrain = Ytot[stock]["RETURNS"][:TIsDates.index(startDate)]
Xpred = Xtot[stock]["ANALYTICS"][TIsDates.index(startDate)]
mu = [stock,pr.PredictKNN(Xtrain,Ytrain,Xpred)/self.horizon]
return mu
"""Return the Black Litterman portfolio with view applied to the machine learning tangent portfolio"""
def GetBLPortfolio(self,stockView,stockViewReturn,stockConfidence,tau,delta,excludedStocks):
#Get list of all stocks excluding some stocks because missing data
stocks = [x['BBGTicker'] for x in list(self.db.Stocks.find({}))]
stocks = [x for x in stocks if x not in excludedStocks]
#Get a dataframe with date as index and prices for each stock (columns)
df = self.GetPricesDataFrame(stocks)
#Get matrix (array) of return, removing NaN numbers
ret = self.GetCleanReturns(df)
n=ret.shape[0] #Number of rows, or number of return for each stock
m=ret.shape[1] #number of stocks
v=len(stockView) #Number of stocks in the view
P = numpy.zeros((v,m)) # v×m matrix of the asset weights within each view
Q = numpy.zeros((v,1)) # v×1 vector of the returns for each view
indexh=[]
#Populate view matrix P and Q given user inputs stockView and stockViewReturn
for item in stockView:
i = stockView.index(item)
j = stocks.index(item)
P[i][j] = 1
Q[i] = stockViewReturn[i] #+mu[j]
indexh.append(j)
portBLValue=[]
self.rebalanceFrequency=n+1 #Only 1 rebalancing
#Get tangent portfolio weights
h,unused,unused,unused = self.GetOptimalPortfolio(excludedStocks)
#Calculate covariance matrix
V = numpy.cov(ret[:self.window].T)
# Coefficient of uncertainty in the prior estimate of the mean
# from footnote (8) on page 11
tauV = tau * V
# k×k matrix of the covariance of the views. Ω is diagonal as the views are required to be
#independent and uncorrelated
Omega = numpy.zeros((v,v))
for c in range(v):
Omega[c][c] = bl_omega(stockConfidence[c], P[c], tauV)
#Call Black Litterman function from .org website
er, hBL, lmbda = altblacklitterman(delta, h[0], V, tau, P, Q, Omega)
htemp = numpy.reshape(hBL,(1,len(stocks)))
hBL = htemp[0]
#Adjust weights such that the sum of weights = 1
hBL = self.AdjustBLWeights(h[0],hBL,indexh)
tempBL = df[self.window:].dot(hBL)
portBLValue= [tempBL/tempBL[0]] #Adjust portfolio value
portBLValue = self.GetDataFrameFromList(portBLValue) #Get a dataframe instead of list
return hBL,portBLValue
"""Adjust BL weights such that they sum to 1"""
def AdjustBLWeights(self,h,hBL,indexh):
summ=0
summ2=0
for ind in indexh:
summ+= hBL[ind]
summ2+= h[ind]
for ind in indexh:
hBL[ind] = hBL[ind] * summ2/summ
return hBL
"""Return a merged dataframe for the list of stock 'stocks' """
def GetPricesDataFrame(self,stocks):
#Initial dummy dataframe
df = pandas.DataFrame([''],index=['1900-01-01'])
dfHeaders = ['Dummy']
#For each stock retrieve dataframe then merge
for stock in stocks:
dfHeaders.append(stock+' Close') #Stock close price header
#If Adjusted close price is available then use it, else use Close price
dataMongo = self.db.Prices.find_one({'BBGTicker' : stock},{'Adj Close' : 1})
if 'Adj Close' not in list(dataMongo.keys()):
dataMongo = self.db.Prices.find_one({'BBGTicker' : stock},{'Close' : 1})
data=dataMongo['Close']
else:
data=dataMongo['Adj Close']
dates = sorted(data.keys()) #Get sorted list of dates for each price
#Handle incorrect date entered by users
dh = DataHandler
self.startDate = dh.HandleIncorrectDate(self.startDate,'',dates)
self.endDate = dh.HandleIncorrectDate('',self.endDate,dates)
#Filter date set
dates = dates[dates.index(self.startDate):dates.index(self.endDate)]
#Build list of prices
close=[]
for dt in dates:
close.append(data[dt])
s = [stock+' Close']
s.append(close)
s.append(dates)
#Transform list to dataframe and concatenate
tempdf = pandas.DataFrame(s[1],index=s[2])
df = pandas.concat([df, tempdf], axis=1,join='outer')
df.columns = dfHeaders #set dataframe headers
df = df.drop('Dummy',1) #Drop initial dummy column
df = df.sort_index() #Sort dataframe by date
df = df.dropna(axis=0, how='any') #Drop line with NaN prices
return df
"""Given a dataframe returns a multi dimensional array of clean returns"""
def GetCleanReturns(self,df):
ret = df.as_matrix() #Define matrix
ret = ret[:len(ret)-1,:]
ret = numpy.diff(ret.astype(float), axis=0)/ret[:-1].astype(float) #Calculate daily returns
ret = ret[~numpy.isnan(ret).any(axis=1)] #Remove NaN returns
ret = ret[numpy.all(numpy.abs(ret) < 1, axis=1)] #Remove outlier
return ret
"""Given a list of list of portfolio value return a dataframe"""
def GetDataFrameFromList(self,portValue):
#For each list of portfolio value in list transform to dataframe
temp = portValue[0]
for p in range(len(portValue)-1):
temp = pandas.concat([pandas.DataFrame(temp),pandas.DataFrame(portValue[p+1])],axis=0)
portValue = pandas.DataFrame(temp)
portValue.index = pandas.to_datetime(portValue.index)
return portValue
"""Inverse of shrpe ratio function of covariance matrix Sig, """
"""portfolio weights w, vector of expected returns Er, and risk free rate rf"""
def sharpe(self,w,Sig,Er,rf):
return numpy.dot(w,numpy.dot(Sig,w))/(numpy.dot(Er,w) - rf)