Exemplo n.º 1
0
    def GetOptimalPortfolio(self,excludedStocks):
        
        #Get list of all stocks excluding some stocks because missing data
        stocks = [x['BBGTicker'] for x in list(self.db.Stocks.find({}))]
        stocks = [x for x in stocks if x not in excludedStocks]
        
        Xtot ={}
        Ytot ={}
        dh = DataHandler
        
        #Download machine learning features technical indicators for each stock and insert in dictionary
        for s in stocks:
            #Get the list of dates using prices 
            dataMongo = self.db.Prices.find_one({'BBGTicker' : s},{'Close' : 1})
            data=dataMongo['Close']
            dates = sorted(data.keys())
            
            #Use data only from 2009-02-04, earlier data are irrelevant
            filterDate='2009-02-04'
            filterDate = dh.HandleIncorrectDate(filterDate,'',dates)
            indexDate = dates.index(filterDate) 
            dates = dates[indexDate:len(dates)-1-self.horizon]
            
            #For each stock insert features into dictionary Xtot and output future return into dictionary Ytot
            pr = Predictor(s,self.endDate,self.horizon,self.db)
            Xtot[s]= {"ANALYTICS": pr.GetMLInputs(dates) , "DATES" : dates}
            Ytot[s]= {"RETURNS": pr.GetMLOutputs(dates) , "DATES" : dates}
        
        #Get a dataframe with date as index and prices for each stock (columns)
        df = self.GetPricesDataFrame(stocks)
        #Get matrix (array) of return, removing NaN numbers
        ret = self.GetCleanReturns(df) 

        n=ret.shape[0] #Number of rows, or number of return for each stock
        m=ret.shape[1] #number of stocks
        
        #Given number of dates and frequency (every h days), get the number of time we want to rebalance
        self.rebalanceFreq=min(n,self.rebalanceFrequency)
        rebalanceTotal = math.floor((n - n%self.rebalanceFreq-self.window)/self.rebalanceFreq)
         
        portValue=[]
        htot=[]
        mutot=[]

        #for eachbtime portfolio is rebalanced calculate optimal weights
        for k in range(rebalanceTotal+1):   
            st=0
            mu = numpy.empty(m) #expected return vector for each stock

            #Define partial function to use multi thread pool
            func = partial(self.GetExpectedReturns,Xtot,Ytot,k)
            pool = Pool(len(stocks))

            #Calculate expected returns using parallel programming multithreading
            results = pool.map(func, stocks)
            
            #close the pool and wait for the work to finish
            pool.close()
            pool.join()
            
            #Ensure we map the expected return to the correct stock and store in numpy array mu
            for s in stocks:
                for res in results:
                    if res[0]==s:
                        mu[st] = res[1]
                        st=st+1
                        break
                     
            #Calculate covariance matrix
            V = numpy.cov(ret[k*self.rebalanceFreq:k*self.rebalanceFreq+self.window].T)    
            
            #Insert expected return vector for each rebalancing
            mutot.append(mu)

            #Set optimization constraints (sum of weights = 1) and boundaries (0 <= weight <= 1)
            bnds = ((0,1),)*m
            cons = ({'type': 'eq', 'fun': lambda x:  numpy.sum(x)-1.0})
            h0 = numpy.ones(m)
            
            #Minimize the inverse of sharpe ratio to get optimal weights h
            res= minimize(self.sharpe, h0, args=(V,mu,0.001,)
                                        ,method='SLSQP',constraints=cons,bounds=bnds)
            h=res.x            
            htot.append(h) #append weights for each rebalancing 
            
            #Apply optimal weights to dataframe price to get the portfolio values for each date
            #Handle the reinvestment when the portfolio is rebalanced
            if (k==rebalanceTotal):
                temp = df[k*self.rebalanceFreq+self.window:df.shape[0]-1].dot(h)
            else:
                temp = df[k*self.rebalanceFreq + self.window:(k+1)*self.rebalanceFreq + self.window].dot(h) 
            if k==0:    
                temp = temp/temp[0]
            elif k>0:
                temp = temp/temp[0] * portValue[k-1][portValue[k-1].size-1]
            portValue.append(temp)             
        
        portValue = self.GetDataFrameFromList(portValue) #Get a dataframe instead of list

        return htot,portValue,mutot,stocks