def DownloadAndGraphStocks(tickerList: list): for ticker in tickerList: prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(requestedEndDate=GetTodaysDate()): print('Calcualting stats ' + ticker) prices.NormalizePrices() prices.CalculateStats() prices.PredictPrices(2, 15) prices.NormalizePrices() #prices.SaveStatsToFile(includePredictions=True, verbose=True) psnap = prices.GetCurrentPriceSnapshot() titleStatistics = ' 5/15 dev: ' + str( round(psnap.fiveDayDeviation * 100, 2)) + '/' + str( round(psnap.fifteenDayDeviation * 100, 2)) + '% ' + str( psnap.low) + '/' + str( psnap.nextDayTarget) + '/' + str( psnap.high) + ' ' + str( psnap.snapShotDate)[:10] print('Graphing ' + ticker + ' ' + str(psnap.snapShotDate)[:10]) for days in [90, 180, 365, 2190, 4380]: prices.GraphData(endDate=None, daysToGraph=days, graphTitle=ticker + '_days' + str(days) + ' ' + titleStatistics, includePredictions=(days < 1000), saveToFile=True, fileNameSuffix=str(days).rjust(4, '0') + 'd', trimHistoricalPredictions=False)
def CalculatePriceCorrelation(tickerList: list): datafileName = 'data/_priceCorrelations.csv' summaryfileName = 'data/_priceCorrelationTop10.txt' result = pandas.DataFrame() startDate = str(datetime.datetime.now().date() + datetime.timedelta(days=-365)) endDate = str(datetime.datetime.now().date()) for ticker in tickerList: prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(True): prices.TrimToDateRange(startDate, endDate) prices.NormalizePrices() result[ticker] = prices.GetPriceHistory(['Average']) result = result.corr() result.to_csv(datafileName) f = open(summaryfileName, 'w') for ticker in tickerList: topTen = result.nsmallest(10, ticker) print(topTen[ticker]) f.write(ticker + '\n') f.write(topTen[ticker].to_string(header=True, index=True) + '\n') f.write('\n') f.close()
def CalculatePriceCorrelation(tickerList: list): datafileName = 'data/_priceCorrelations.csv' summaryfileName = 'data/_priceCorrelationTop10.txt' result = None startDate = str(AddDays(GetTodaysDate(), -365)) endDate = str(GetTodaysDate()) for ticker in tickerList: prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(requestedEndDate=GetTodaysDate()): prices.TrimToDateRange(startDate, endDate) prices.NormalizePrices() x = prices.GetPriceHistory(['Average']) x.rename(index=str, columns={"Average": ticker}, inplace=True) if result is None: result = x else: result = result.join(x, how='outer') result = result.corr() result.to_csv(datafileName) f = open(summaryfileName, 'w') for ticker in tickerList: topTen = result.nsmallest(10, ticker) print(topTen[ticker]) f.write(ticker + '\n') f.write(topTen[ticker].to_string(header=True, index=True) + '\n') f.write('\n') f.close() print( 'Intended to create stability, in practice, this is a great way to pair well performing stocks with poor performing or volatile stocks.' )
def SampleCNN(ticker: str): #Print sample CNN graphs of ticker, CNN will treat price data as picture and anticipate the next picture plot = PlotHelper() prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(): prices.NormalizePrices() window_size = 80 target_size = 10 daysInTraining = 800 sampleData = prices.GetPriceHistory() endDate = sampleData.index.max() cuttoffDate = endDate - BDay(window_size) startDate = cuttoffDate - BDay(daysInTraining) print(dataFolder + 'samples\CNNsampleLearning', startDate, cuttoffDate, endDate) for i in range(0, 10): ii = i * window_size d1 = startDate + BDay(ii) d2 = d1 + BDay(target_size) print(d1, d2, window_size, target_size) plot.PlotDataFrameDateRange( sampleData[['Average']], d1, window_size, 'Sample image ' + str(i), 'Date', 'Price', dataFolder + 'samples/CNN' + str(i) + 'Sample') plot.PlotDataFrameDateRange( sampleData[['Average']], d2, target_size, 'Target image ' + str(i), 'Date', 'Price', dataFolder + 'samples/CNN' + str(i) + 'Target')
def DownloadAndGraphStocks(tickerList: list): for ticker in tickerList: prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(True): print('Calcualting stats ' + ticker) prices.NormalizePrices() prices.CalculateStats() prices.PredictPrices(2, 15) prices.NormalizePrices() prices.SaveStatsToFile(True) psnap = prices.GetCurrentPriceSnapshot() titleStatistics = ' 5/15 dev: ' + str( round(psnap.fiveDayDeviation * 100, 2)) + '/' + str( round(psnap.fifteenDayDeviation * 100, 2)) + '% ' + str( psnap.low) + '/' + str( psnap.nextDayTarget) + '/' + str( psnap.high) + ' ' + str( psnap.snapshotDate)[:10] print('Graphing ' + ticker + ' ' + str(psnap.snapshotDate)[:10]) for days in [90, 180, 365, 2190, 4380]: prices.GraphData(None, days, ticker + '_days' + str(days) + ' ' + titleStatistics, (days < 1000), True, str(days).rjust(4, '0') + 'd', trimHistoricalPredictions=False)
def CalculatePriceCorrelation(tickerList: list): datafileName = 'data/_priceCorrelations.csv' summaryfileName = 'data/_priceCorrelationTop10.txt' result = None startDate = str(datetime.datetime.now().date() + datetime.timedelta(days=-365)) endDate = str(datetime.datetime.now().date()) for ticker in tickerList: prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(): prices.TrimToDateRange(startDate, endDate) prices.NormalizePrices() x = prices.GetPriceHistory(['Average']) x.rename(index=str, columns={"Average": ticker}, inplace=True) if result is None: result = x else: result = result.join(x, how='outer') result = result.corr() result.to_csv(datafileName) f = open(summaryfileName, 'w') for ticker in tickerList: topTen = result.nsmallest(10, ticker) print(topTen[ticker]) f.write(ticker + '\n') f.write(topTen[ticker].to_string(header=True, index=True) + '\n') f.write('\n') f.close()
def DownloadAndSaveStocksWithStats(tickerList: list): for ticker in tickerList: prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(requestedEndDate=GetTodaysDate()): print('Calcualting stats ' + ticker) prices.CalculateStats() prices.SaveStatsToFile(includePredictions=False, verbose=True)
def RunPredictions(ticker: str = '^SPX', numberOfLearningPasses: int = 750): #Runs three prediction models (Linear, LSTM, CCN) predicting a target price 4, 20, and 60 days in the future. prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(): prices.TrimToDateRange('1/1/1950', '3/1/2018') prices.NormalizePrices() for ii in [4, 20, 60]: for i in range(0, 3): PredictPrices(prices, i, ii, numberOfLearningPasses)
def RunPredictions(ticker: str = '^SPX', numberOfLearningPasses: int = 750): prices = PricingData(ticker) CreateFolder(dataFolder) print('Loading ' + ticker) if prices.LoadHistory(): prices.TrimToDateRange('1/1/1950', '3/1/2018') prices.NormalizePrices() for ii in [4, 20, 60]: for i in range(0, 3): PredictPrices(prices, i, ii, numberOfLearningPasses)
def GraphTimePeriod(ticker: str, endDate: str, days: int): prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(): prices.GraphData(endDate=endDate, daysToGraph=days, graphTitle=None, includePredictions=False, saveToFile=True, fileNameSuffix=None) print('Chart saved to \data\charts')
def SampleGraphs(ticker:str, daysInGraph:int): plot = PlotHelper() prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(True): prices.NormalizePrices() sampleData = prices.GetPriceHistory() d = sampleData.index[-1] for i in range(0,200, 10): #Add new days to the end for crystal ball predictions sampleDate = d - BDay(i) #pick business day to plot plot.PlotDataFrameDateRange(sampleData[['Open','High', 'Low','Close']], sampleDate, daysInGraph, 'Sample window ' + str(daysInGraph), 'Date', 'Price', dataFolder + 'samples/sample' + str(i) + '_' + str(daysInGraph))
def OpportunityFinder(tickerList: list): outputFolder = 'data/dailypicks/' summaryFile = '_summary.txt' overBoughtList = [] oversoldList = [] highDeviationList = [] for root, dirs, files in os.walk(outputFolder): for f in files: if f.endswith('.txt') or f.endswith('.png'): os.unlink(os.path.join(root, f)) for ticker in tickerList: prices = PricingData(ticker) print('Checking ' + ticker) if prices.LoadHistory(True): prices.CalculateStats() psnap = prices.GetCurrentPriceSnapshot() titleStatistics = ' 5/15 dev: ' + str( round(psnap.fiveDayDeviation * 100, 2)) + '/' + str( round(psnap.fifteenDayDeviation * 100, 2)) + '% ' + str( psnap.low) + '/' + str( psnap.nextDayTarget) + '/' + str( psnap.high) + str(snapshotDate) if psnap.low > psnap.channelHigh: overBoughtList.append(ticker) if psnap.high < psnap.channelLow: oversoldList.append(ticker) prices.GraphData(None, 60, ticker + ' 60d ' + titleStatistics, False, True, '60d', outputFolder) if psnap.fiveDayDeviation > .0275: highDeviationList.append(ticker) prices.GraphData(None, 60, ticker + ' 60d ' + titleStatistics, False, True, '60d', outputFolder) print('Over bought:') print(overBoughtList) print('Over sold:') print(oversoldList) print('High deviation:') print(highDeviationList) f = open(outputFolder + summaryFile, 'w') f.write('Over bought:\n') for t in overBoughtList: f.write(t + '\n') f.write('\nOver sold:\n') for t in oversoldList: f.write(t + '\n') f.write('\nHigh deviation:\n') for t in highDeviationList: f.write(t + '\n') f.close()
def PlotPrediction(ticker: str = '^SPX', predictionMethod: int = 0, daysToGraph: int = 60, daysForward: int = 5, learnhingEpochs: int = 500): print('Plotting predictions for ' + ticker) prices = PricingData(ticker) if prices.LoadHistory(True): prices.NormalizePrices() prices.PredictPrices(predictionMethod, daysForward, learnhingEpochs) prices.NormalizePrices() prices.GraphData(None, daysToGraph, ticker + ' ' + str(daysToGraph) + 'days', True, True, str(daysToGraph) + 'days') prices.SaveStatsToFile(True)
def SampleLSTM(ticker:str): plot = PlotHelper() prices = PricingData(ticker) print('Loading ' + ticker) CreateFolder(dataFolder + 'samples') if prices.LoadHistory(True): prices.NormalizePrices() daysInTarget = 15 daysInTraining = 200 sampleData = prices.GetPriceHistory() endDate = sampleData.index.max() cuttoffDate = endDate - BDay(daysInTarget) startDate = cuttoffDate - BDay(daysInTraining) print(dataFolder + 'samples/LSTMsampleLearning', startDate, cuttoffDate, endDate) plot.PlotDataFrameDateRange(sampleData[['Average']], cuttoffDate, daysInTraining, 'Learn from this series of days', 'Date', 'Price', dataFolder + 'samples/LSTMLearning') plot.PlotDataFrameDateRange(sampleData[['Average']], endDate, daysInTarget, 'Predict what happens after this series of days', 'Date', 'Price', dataFolder + 'samples/LSTMTarget')
def PlotAnnualPerformance(ticker: str = '^SPX'): print('Annual performance rate for ' + ticker) prices = PricingData(ticker) if prices.LoadHistory(True): x = prices.GetPriceHistory(['Average']) yearly = x.groupby([(x.index.year)]).first() yearlyChange = yearly.pct_change(1) monthly = x.groupby([(x.index.year), (x.index.month)]).first() plot = PlotHelper() plot.PlotDataFrame(yearly, title='Yearly', adjustScale=False) plot.PlotDataFrame(monthly, title='Monthly', adjustScale=False) plot.PlotDataFrame(yearlyChange, title='Yearly Percentage Change', adjustScale=False) print('Average annual change from ', prices.historyStartDate, ' to ', prices.historyEndDate, ': ', yearlyChange.mean().values * 100, '%')
def TestPredictionModels(ticker: str = '^SPX', numberOfLearningPasses: int = 300): #Simple procedure to test different prediction methods 4,20,60 days in the future plot = PlotHelper() prices = PricingData(ticker) if prices.LoadHistory(): prices.TrimToDateRange('1/1/2000', '3/1/2018') print('Loading ' + ticker) for daysForward in [4, 20, 60]: for predictionMethod in range(0, 5): modelDescription = ticker + '_method' + str( predictionMethod) + '_epochs' + str( numberOfLearningPasses) + '_daysforward' + str( daysForward) print('Predicting ' + str(daysForward) + ' days using method ' + modelDescription) prices.PredictPrices(predictionMethod, daysForward, numberOfLearningPasses) predDF = prices.pricePredictions.copy() predDF = predDF.join(prices.GetPriceHistory()) predDF['PercentageDeviation'] = abs( (predDF['Average'] - predDF['estAverage']) / predDF['Average']) averageDeviation = predDF['PercentageDeviation'].tail( round(predDF.shape[0] / 4)).mean( ) #Average of the last 25% to account for training. print('Average deviation: ', averageDeviation * 100, '%') predDF.to_csv(dataFolder + modelDescription + '.csv') plot.PlotDataFrame(predDF[['estAverage', 'Average']], modelDescription, 'Date', 'Price', True, dataFolder + modelDescription) plot.PlotDataFrameDateRange( predDF[['Average', 'estAverage']], None, 160, modelDescription + '_last160ays', 'Date', 'Price', dataFolder + modelDescription + '_last160Days') plot.PlotDataFrameDateRange( predDF[['Average', 'estAverage']], None, 500, modelDescription + '_last500Days', 'Date', 'Price', dataFolder + modelDescription + '_last500Days')
def GraphTimePeriod(ticker: str, endDate: datetime, days: int): prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(): prices.GraphData(endDate, days, None, False, True, None)
def TrainTickerRaw(ticker: str = '^SPX', UseLSTM: bool = True, prediction_target_days: int = 5, epochs: int = 500, usePercentages: bool = False, hidden_layer_size: int = 512, dropout: bool = True, dropout_rate: float = 0.01, learning_rate: float = 2e-5): plot = PlotHelper() prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(True): prices.TrimToDateRange('1/1/2000', '3/1/2018') if usePercentages: prices.ConvertToPercentages( ) #Percentages don't work well I suspect because small errors have a huge impact when you revert back to the original prices and they roll forward else: prices.NormalizePrices() prices.CalculateStats() model = StockPredictionNN(baseModelName=ticker, UseLSTM=UseLSTM) if UseLSTM: window_size = 1 modelDescription = ticker + '_LSTM' modelDescription += '_epochs' + str(epochs) + '_histwin' + str( window_size) + '_daysforward' + str(prediction_target_days) if usePercentages: modelDescription += '_percentages' FieldList = ['Average'] model.LoadSource(sourceDF=prices.GetPriceHistory(), FieldList=FieldList, window_size=window_size) model.LoadTarget(targetDF=None, prediction_target_days=prediction_target_days) model.MakeBatches(batch_size=128, train_test_split=.93) model.BuildModel(layer_count=1, hidden_layer_size=hidden_layer_size, dropout=dropout, dropout_rate=dropout_rate, learning_rate=learning_rate) model.DisplayModel() model.Train(epochs=epochs) model.Predict(True) model.Save() #model.DisplayDataSample() else: #CNN window_size = 16 * prediction_target_days modelDescription = ticker + '_CNN' modelDescription += '_epochs' + str(epochs) + '_histwin' + str( window_size) + '_daysforward' + str(prediction_target_days) if usePercentages: modelDescription += '_percentages' #FieldList = None FieldList = ['High', 'Low', 'Open', 'Close'] model.LoadSource(sourceDF=prices.GetPriceHistory(), FieldList=FieldList, window_size=window_size) model.LoadTarget(targetDF=None, prediction_target_days=prediction_target_days) model.MakeBatches(batch_size=64, train_test_split=.93) model.BuildModel(layer_count=1, hidden_layer_size=hidden_layer_size, dropout=dropout, dropout_rate=dropout_rate, learning_rate=learning_rate) model.DisplayModel() model.Train(epochs=epochs) model.Predict(True) model.Save() if usePercentages: predDF = model.GetTrainingResults(True, True) predDF = predDF.loc[:, ['Average', 'Average_Predicted']] print('Unraveling percentages..') predDF['Average_Predicted'].fillna(0, inplace=True) predDF.iloc[0] = prices.CTPFactor['Average'] for i in range(1, predDF.shape[0]): predDF.iloc[i] = (1 + predDF.iloc[i]) * predDF.iloc[i - 1] print(predDF) predDF['PercentageDeviation'] = abs( (predDF['Average'] - predDF['Average_Predicted']) / predDF['Average']) predDF.to_csv(dataFolder + modelDescription + '.csv') plot.PlotDataFrame(predDF[['Average', 'Average_Predicted']], modelDescription, 'Date', 'Price', True, dataFolder + modelDescription) plot.PlotDataFrameDateRange( predDF[['Average', 'Average_Predicted']], None, 160, modelDescription + '_last160ays', 'Date', 'Price', dataFolder + modelDescription + '_last160Days') plot.PlotDataFrameDateRange( predDF[['Average', 'Average_Predicted']], None, 500, modelDescription + '_last500Days', 'Date', 'Price', dataFolder + modelDescription + '_last500Days') else: model.PredictionResultsSave(modelDescription, True, True) model.PredictionResultsPlot(modelDescription, True, False)
def OpportunityFinder(tickerList: list): outputFolder = 'data/dailypicks/' summaryFile = '_DailyPicks.csv' candidates = pd.DataFrame(columns=list([ 'Ticker', 'hp2Year', 'hp1Year', 'hp6mo', 'hp3mo', 'hp2mo', 'hp1mo', 'currentPrice', 'channelHigh', 'channelLow', 'shortEMA', 'longEMA', '2yearPriceChange', '1yearPriceChange', '6moPriceChange', '3moPriceChange', '2moPriceChange', '1moPriceChange', 'dailyGain', 'monthlyGain', 'monthlyLossStd', 'Comments' ])) candidates.set_index(['Ticker'], inplace=True) for root, dirs, files in os.walk(outputFolder): for f in files: if f.endswith('.png'): os.unlink(os.path.join(root, f)) for ticker in tickerList: prices = PricingData(ticker) currentDate = GetTodaysDate() print('Checking ' + ticker) if prices.LoadHistory(requestedEndDate=currentDate): prices.CalculateStats() psnap = prices.GetPriceSnapshot(AddDays(currentDate, -730)) hp2Year = psnap.fiveDayAverage psnap = prices.GetPriceSnapshot(AddDays(currentDate, -365)) hp1Year = psnap.fiveDayAverage psnap = prices.GetPriceSnapshot(AddDays(currentDate, -180)) hp6mo = psnap.fiveDayAverage psnap = prices.GetPriceSnapshot(AddDays(currentDate, -90)) hp3mo = psnap.fiveDayAverage psnap = prices.GetPriceSnapshot(AddDays(currentDate, -60)) hp2mo = psnap.fiveDayAverage psnap = prices.GetPriceSnapshot(AddDays(currentDate, -30)) hp1mo = psnap.fiveDayAverage psnap = prices.GetCurrentPriceSnapshot() currentPrice = psnap.twoDayAverage Comments = '' if psnap.low > psnap.channelHigh: Comments += 'OverBought; ' if psnap.high < psnap.channelLow: Comments += 'OverSold; ' if psnap.fiveDayDeviation > .0275: Comments += 'HighDeviation; ' if Comments != '': titleStatistics = ' 5/15 dev: ' + str( round(psnap.fiveDayDeviation * 100, 2)) + '/' + str( round(psnap.fifteenDayDeviation * 100, 2)) + '% ' + str(psnap.low) + '/' + str( psnap.nextDayTarget) + '/' + str( psnap.high) + str(psnap.snapShotDate) prices.GraphData(None, 60, ticker + ' 60d ' + titleStatistics, False, True, '60d', outputFolder) if (currentPrice > 0 and hp2Year > 0 and hp1Year > 0 and hp6mo > 0 and hp2mo > 0 and hp1mo > 0): #values were loaded candidates.loc[ticker] = [ hp2Year, hp1Year, hp6mo, hp3mo, hp2mo, hp1mo, currentPrice, psnap.channelHigh, psnap.channelLow, psnap.shortEMA, psnap.longEMA, (currentPrice / hp2Year) - 1, (currentPrice / hp1Year) - 1, (currentPrice / hp6mo) - 1, (currentPrice / hp3mo) - 1, (currentPrice / hp2mo) - 1, (currentPrice / hp1mo) - 1, psnap.dailyGain, psnap.monthlyGain, psnap.monthlyLossStd, Comments ] else: print(ticker, currentPrice, hp2Year, hp1Year, hp6mo, hp2mo, hp1mo) print(candidates) candidates.to_csv(outputFolder + summaryFile)