def CalculatePriceCorrelation(tickerList: list): datafileName = 'data/_priceCorrelations.csv' summaryfileName = 'data/_priceCorrelationTop10.txt' result = None startDate = str(AddDays(GetTodaysDate(), -365)) endDate = str(GetTodaysDate()) for ticker in tickerList: prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(requestedEndDate=GetTodaysDate()): prices.TrimToDateRange(startDate, endDate) prices.NormalizePrices() x = prices.GetPriceHistory(['Average']) x.rename(index=str, columns={"Average": ticker}, inplace=True) if result is None: result = x else: result = result.join(x, how='outer') result = result.corr() result.to_csv(datafileName) f = open(summaryfileName, 'w') for ticker in tickerList: topTen = result.nsmallest(10, ticker) print(topTen[ticker]) f.write(ticker + '\n') f.write(topTen[ticker].to_string(header=True, index=True) + '\n') f.write('\n') f.close() print( 'Intended to create stability, in practice, this is a great way to pair well performing stocks with poor performing or volatile stocks.' )
def CalculatePriceCorrelation(tickerList: list): datafileName = 'data/_priceCorrelations.csv' summaryfileName = 'data/_priceCorrelationTop10.txt' result = pandas.DataFrame() startDate = str(datetime.datetime.now().date() + datetime.timedelta(days=-365)) endDate = str(datetime.datetime.now().date()) for ticker in tickerList: prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(True): prices.TrimToDateRange(startDate, endDate) prices.NormalizePrices() result[ticker] = prices.GetPriceHistory(['Average']) result = result.corr() result.to_csv(datafileName) f = open(summaryfileName, 'w') for ticker in tickerList: topTen = result.nsmallest(10, ticker) print(topTen[ticker]) f.write(ticker + '\n') f.write(topTen[ticker].to_string(header=True, index=True) + '\n') f.write('\n') f.close()
def CalculatePriceCorrelation(tickerList: list): datafileName = 'data/_priceCorrelations.csv' summaryfileName = 'data/_priceCorrelationTop10.txt' result = None startDate = str(datetime.datetime.now().date() + datetime.timedelta(days=-365)) endDate = str(datetime.datetime.now().date()) for ticker in tickerList: prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(): prices.TrimToDateRange(startDate, endDate) prices.NormalizePrices() x = prices.GetPriceHistory(['Average']) x.rename(index=str, columns={"Average": ticker}, inplace=True) if result is None: result = x else: result = result.join(x, how='outer') result = result.corr() result.to_csv(datafileName) f = open(summaryfileName, 'w') for ticker in tickerList: topTen = result.nsmallest(10, ticker) print(topTen[ticker]) f.write(ticker + '\n') f.write(topTen[ticker].to_string(header=True, index=True) + '\n') f.write('\n') f.close()
def RunPredictions(ticker: str = '^SPX', numberOfLearningPasses: int = 750): #Runs three prediction models (Linear, LSTM, CCN) predicting a target price 4, 20, and 60 days in the future. prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(): prices.TrimToDateRange('1/1/1950', '3/1/2018') prices.NormalizePrices() for ii in [4, 20, 60]: for i in range(0, 3): PredictPrices(prices, i, ii, numberOfLearningPasses)
def RunPredictions(ticker: str = '^SPX', numberOfLearningPasses: int = 750): prices = PricingData(ticker) CreateFolder(dataFolder) print('Loading ' + ticker) if prices.LoadHistory(): prices.TrimToDateRange('1/1/1950', '3/1/2018') prices.NormalizePrices() for ii in [4, 20, 60]: for i in range(0, 3): PredictPrices(prices, i, ii, numberOfLearningPasses)
def TestPredictionModels(ticker: str = '^SPX', numberOfLearningPasses: int = 300): #Simple procedure to test different prediction methods 4,20,60 days in the future plot = PlotHelper() prices = PricingData(ticker) if prices.LoadHistory(): prices.TrimToDateRange('1/1/2000', '3/1/2018') print('Loading ' + ticker) for daysForward in [4, 20, 60]: for predictionMethod in range(0, 5): modelDescription = ticker + '_method' + str( predictionMethod) + '_epochs' + str( numberOfLearningPasses) + '_daysforward' + str( daysForward) print('Predicting ' + str(daysForward) + ' days using method ' + modelDescription) prices.PredictPrices(predictionMethod, daysForward, numberOfLearningPasses) predDF = prices.pricePredictions.copy() predDF = predDF.join(prices.GetPriceHistory()) predDF['PercentageDeviation'] = abs( (predDF['Average'] - predDF['estAverage']) / predDF['Average']) averageDeviation = predDF['PercentageDeviation'].tail( round(predDF.shape[0] / 4)).mean( ) #Average of the last 25% to account for training. print('Average deviation: ', averageDeviation * 100, '%') predDF.to_csv(dataFolder + modelDescription + '.csv') plot.PlotDataFrame(predDF[['estAverage', 'Average']], modelDescription, 'Date', 'Price', True, dataFolder + modelDescription) plot.PlotDataFrameDateRange( predDF[['Average', 'estAverage']], None, 160, modelDescription + '_last160ays', 'Date', 'Price', dataFolder + modelDescription + '_last160Days') plot.PlotDataFrameDateRange( predDF[['Average', 'estAverage']], None, 500, modelDescription + '_last500Days', 'Date', 'Price', dataFolder + modelDescription + '_last500Days')
def TrainTickerRaw(ticker: str = '^SPX', UseLSTM: bool = True, prediction_target_days: int = 5, epochs: int = 500, usePercentages: bool = False, hidden_layer_size: int = 512, dropout: bool = True, dropout_rate: float = 0.01, learning_rate: float = 2e-5): plot = PlotHelper() prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(True): prices.TrimToDateRange('1/1/2000', '3/1/2018') if usePercentages: prices.ConvertToPercentages( ) #Percentages don't work well I suspect because small errors have a huge impact when you revert back to the original prices and they roll forward else: prices.NormalizePrices() prices.CalculateStats() model = StockPredictionNN(baseModelName=ticker, UseLSTM=UseLSTM) if UseLSTM: window_size = 1 modelDescription = ticker + '_LSTM' modelDescription += '_epochs' + str(epochs) + '_histwin' + str( window_size) + '_daysforward' + str(prediction_target_days) if usePercentages: modelDescription += '_percentages' FieldList = ['Average'] model.LoadSource(sourceDF=prices.GetPriceHistory(), FieldList=FieldList, window_size=window_size) model.LoadTarget(targetDF=None, prediction_target_days=prediction_target_days) model.MakeBatches(batch_size=128, train_test_split=.93) model.BuildModel(layer_count=1, hidden_layer_size=hidden_layer_size, dropout=dropout, dropout_rate=dropout_rate, learning_rate=learning_rate) model.DisplayModel() model.Train(epochs=epochs) model.Predict(True) model.Save() #model.DisplayDataSample() else: #CNN window_size = 16 * prediction_target_days modelDescription = ticker + '_CNN' modelDescription += '_epochs' + str(epochs) + '_histwin' + str( window_size) + '_daysforward' + str(prediction_target_days) if usePercentages: modelDescription += '_percentages' #FieldList = None FieldList = ['High', 'Low', 'Open', 'Close'] model.LoadSource(sourceDF=prices.GetPriceHistory(), FieldList=FieldList, window_size=window_size) model.LoadTarget(targetDF=None, prediction_target_days=prediction_target_days) model.MakeBatches(batch_size=64, train_test_split=.93) model.BuildModel(layer_count=1, hidden_layer_size=hidden_layer_size, dropout=dropout, dropout_rate=dropout_rate, learning_rate=learning_rate) model.DisplayModel() model.Train(epochs=epochs) model.Predict(True) model.Save() if usePercentages: predDF = model.GetTrainingResults(True, True) predDF = predDF.loc[:, ['Average', 'Average_Predicted']] print('Unraveling percentages..') predDF['Average_Predicted'].fillna(0, inplace=True) predDF.iloc[0] = prices.CTPFactor['Average'] for i in range(1, predDF.shape[0]): predDF.iloc[i] = (1 + predDF.iloc[i]) * predDF.iloc[i - 1] print(predDF) predDF['PercentageDeviation'] = abs( (predDF['Average'] - predDF['Average_Predicted']) / predDF['Average']) predDF.to_csv(dataFolder + modelDescription + '.csv') plot.PlotDataFrame(predDF[['Average', 'Average_Predicted']], modelDescription, 'Date', 'Price', True, dataFolder + modelDescription) plot.PlotDataFrameDateRange( predDF[['Average', 'Average_Predicted']], None, 160, modelDescription + '_last160ays', 'Date', 'Price', dataFolder + modelDescription + '_last160Days') plot.PlotDataFrameDateRange( predDF[['Average', 'Average_Predicted']], None, 500, modelDescription + '_last500Days', 'Date', 'Price', dataFolder + modelDescription + '_last500Days') else: model.PredictionResultsSave(modelDescription, True, True) model.PredictionResultsPlot(modelDescription, True, False)