Пример #1
0
def CompareModels(modelOneName: str,
                  modelOneFunction,
                  modelTwoName: str,
                  modelTwoFunction,
                  ticker: str,
                  startDate: str,
                  durationInYears: int,
                  portfolioSize: int = 10000):
    #Compare two models to measure the difference in returns
    m1 = RunModel(modelOneName,
                  modelOneFunction,
                  ticker,
                  startDate,
                  durationInYears,
                  portfolioSize,
                  saveHistoryToFile=False,
                  returndailyValues=True,
                  verbose=False)
    m2 = RunModel(modelTwoName,
                  modelTwoFunction,
                  ticker,
                  startDate,
                  durationInYears,
                  portfolioSize,
                  saveHistoryToFile=False,
                  returndailyValues=True,
                  verbose=False)
    if m1.shape[0] > 0 and m2.shape[0] > 0:
        m1 = m1.join(m2,
                     lsuffix='_' + modelOneName,
                     rsuffix='_' + modelTwoName)
        plot = PlotHelper()
        plot.PlotDataFrame(m1, ticker + ' Model Comparison', 'Date', 'Value')
Пример #2
0
def SampleCNN(ticker: str):
    #Print sample CNN graphs of ticker, CNN will treat price data as picture and anticipate the next picture
    plot = PlotHelper()
    prices = PricingData(ticker)
    print('Loading ' + ticker)
    if prices.LoadHistory():
        prices.NormalizePrices()
        window_size = 80
        target_size = 10
        daysInTraining = 800
        sampleData = prices.GetPriceHistory()
        endDate = sampleData.index.max()
        cuttoffDate = endDate - BDay(window_size)
        startDate = cuttoffDate - BDay(daysInTraining)
        print(dataFolder + 'samples\CNNsampleLearning', startDate, cuttoffDate,
              endDate)
        for i in range(0, 10):
            ii = i * window_size
            d1 = startDate + BDay(ii)
            d2 = d1 + BDay(target_size)
            print(d1, d2, window_size, target_size)
            plot.PlotDataFrameDateRange(
                sampleData[['Average']], d1, window_size,
                'Sample image ' + str(i), 'Date', 'Price',
                dataFolder + 'samples/CNN' + str(i) + 'Sample')
            plot.PlotDataFrameDateRange(
                sampleData[['Average']], d2, target_size,
                'Target image ' + str(i), 'Date', 'Price',
                dataFolder + 'samples/CNN' + str(i) + 'Target')
Пример #3
0
def CreateAdditionalGraph():
	plot = PlotHelper()
	for root, dirs, files in os.walk(dataFolder):
		for f in files:
			if f.endswith('.csv'):
				predDF = pandas.read_csv(os.path.join(root, f), index_col=0, parse_dates=True, na_values=['nan'])
				modelDescription = f[:len(f)-4]
				print(modelDescription)
				plot.PlotDataFrameDateRange(predDF[['Average','Average_Predicted']], None, 500, modelDescription + '_last500ays', 'Date', 'Price', dataFolder + modelDescription + '_last500Days') 
				plot.PlotDataFrameDateRange(predDF[['Average','Average_Predicted']], None, 1000, modelDescription + '_last1000ays', 'Date', 'Price', dataFolder + modelDescription + '_last1000Days') 
Пример #4
0
def SampleGraphs(ticker:str, daysInGraph:int):
	plot = PlotHelper()
	prices = PricingData(ticker)
	print('Loading ' + ticker)
	if prices.LoadHistory(True):
		prices.NormalizePrices()
		sampleData = prices.GetPriceHistory()
		d = sampleData.index[-1]  
		for i in range(0,200, 10): 	 #Add new days to the end for crystal ball predictions
			sampleDate = d - BDay(i) #pick business day to plot
			plot.PlotDataFrameDateRange(sampleData[['Open','High', 'Low','Close']], sampleDate, daysInGraph, 'Sample window ' + str(daysInGraph), 'Date', 'Price', dataFolder + 'samples/sample' + str(i) + '_' + str(daysInGraph)) 
def PlotModeldailyValue(ticker: str, startDate: str, durationInYears: int,
                        modelName: str, modelFunction):
    m1 = modelFunction(ticker,
                       startDate,
                       durationInYears,
                       10000,
                       verbose=False,
                       saveHistoryToFile=True,
                       returndailyValues=True)
    if m1.shape[0] > 0:
        plot = PlotHelper()
        plot.PlotDataFrame(m1, modelName + ' Daily Value (' + ticker + ')',
                           'Date', 'Value')
Пример #6
0
def PlotModeldailyValue(modelName: str, modelFunction, ticker: str,
                        startDate: str, durationInYears: int):
    PortfolioSize = 10000
    m1 = RunModel(modelName,
                  modelFunction,
                  ticker,
                  startDate,
                  durationInYears,
                  PortfolioSize,
                  saveHistoryToFile=True,
                  returndailyValues=True,
                  verbose=False)
    if m1.shape[0] > 0:
        plot = PlotHelper()
        plot.PlotDataFrame(m1, modelName + ' Daily Value (' + ticker + ')',
                           'Date', 'Value')
Пример #7
0
def SampleLSTM(ticker:str):
	plot = PlotHelper()
	prices = PricingData(ticker)
	print('Loading ' + ticker)
	CreateFolder(dataFolder + 'samples')
	if prices.LoadHistory(True):
		prices.NormalizePrices()
		daysInTarget = 15
		daysInTraining = 200
		sampleData = prices.GetPriceHistory()
		endDate  = sampleData.index.max()
		cuttoffDate = endDate - BDay(daysInTarget)
		startDate = cuttoffDate - BDay(daysInTraining)
		print(dataFolder + 'samples/LSTMsampleLearning', startDate, cuttoffDate, endDate)
		plot.PlotDataFrameDateRange(sampleData[['Average']], cuttoffDate, daysInTraining, 'Learn from this series of days', 'Date', 'Price', dataFolder + 'samples/LSTMLearning') 
		plot.PlotDataFrameDateRange(sampleData[['Average']], endDate, daysInTarget, 'Predict what happens after this series of days', 'Date', 'Price', dataFolder + 'samples/LSTMTarget') 
def PlotAnnualPerformance(ticker: str = '^SPX'):
    print('Annual performance rate for ' + ticker)
    prices = PricingData(ticker)
    if prices.LoadHistory(True):
        x = prices.GetPriceHistory(['Average'])
        yearly = x.groupby([(x.index.year)]).first()
        yearlyChange = yearly.pct_change(1)
        monthly = x.groupby([(x.index.year), (x.index.month)]).first()
        plot = PlotHelper()
        plot.PlotDataFrame(yearly, title='Yearly', adjustScale=False)
        plot.PlotDataFrame(monthly, title='Monthly', adjustScale=False)
        plot.PlotDataFrame(yearlyChange,
                           title='Yearly Percentage Change',
                           adjustScale=False)
        print('Average annual change from ', prices.historyStartDate, ' to ',
              prices.historyEndDate, ': ',
              yearlyChange.mean().values * 100, '%')
Пример #9
0
def TestPredictionModels(ticker: str = '^SPX',
                         numberOfLearningPasses: int = 300):
    #Simple procedure to test different prediction methods 4,20,60 days in the future
    plot = PlotHelper()
    prices = PricingData(ticker)
    if prices.LoadHistory():
        prices.TrimToDateRange('1/1/2000', '3/1/2018')
        print('Loading ' + ticker)
        for daysForward in [4, 20, 60]:
            for predictionMethod in range(0, 5):
                modelDescription = ticker + '_method' + str(
                    predictionMethod) + '_epochs' + str(
                        numberOfLearningPasses) + '_daysforward' + str(
                            daysForward)
                print('Predicting ' + str(daysForward) +
                      ' days using method ' + modelDescription)
                prices.PredictPrices(predictionMethod, daysForward,
                                     numberOfLearningPasses)
                predDF = prices.pricePredictions.copy()
                predDF = predDF.join(prices.GetPriceHistory())
                predDF['PercentageDeviation'] = abs(
                    (predDF['Average'] - predDF['estAverage']) /
                    predDF['Average'])
                averageDeviation = predDF['PercentageDeviation'].tail(
                    round(predDF.shape[0] / 4)).mean(
                    )  #Average of the last 25% to account for training.
                print('Average deviation: ', averageDeviation * 100, '%')
                predDF.to_csv(dataFolder + modelDescription + '.csv')
                plot.PlotDataFrame(predDF[['estAverage', 'Average']],
                                   modelDescription, 'Date', 'Price', True,
                                   dataFolder + modelDescription)
                plot.PlotDataFrameDateRange(
                    predDF[['Average', 'estAverage']], None, 160,
                    modelDescription + '_last160ays', 'Date', 'Price',
                    dataFolder + modelDescription + '_last160Days')
                plot.PlotDataFrameDateRange(
                    predDF[['Average', 'estAverage']], None, 500,
                    modelDescription + '_last500Days', 'Date', 'Price',
                    dataFolder + modelDescription + '_last500Days')
def CompareModels(ticker: str, startDate: str, durationInYears: int,
                  modelOneName: str, modelTwoName: str, modelOne, modelTwo):
    m1 = modelOne(ticker,
                  startDate,
                  durationInYears,
                  10000,
                  verbose=False,
                  saveHistoryToFile=False,
                  returndailyValues=True)
    m2 = modelTwo(ticker,
                  startDate,
                  durationInYears,
                  10000,
                  verbose=False,
                  saveHistoryToFile=False,
                  returndailyValues=True)
    if m1.shape[0] > 0 and m2.shape[0] > 0:
        m1 = m1.join(m2,
                     lsuffix='_' + modelOneName,
                     rsuffix='_' + modelTwoName)
        plot = PlotHelper()
        plot.PlotDataFrame(m1, ticker + ' Model Comparison', 'Date', 'Value')
Пример #11
0
def PredictPrices(prices:PricingData, predictionMethod:int=0, daysForward:int = 4, numberOfLearningPasses:int = 500):
	#Simple procedure to test different prediction methods
	assert(0 <= predictionMethod <= 2)
	plot = PlotHelper()
	if predictionMethod ==0:		#Linear projection
		print('Running Linear Projection model predicting ' + str(daysForward) + ' days...')
		modelDescription = prices.stockTicker + '_Linear_daysforward' + str(daysForward) 
		predDF = prices.GetPriceHistory()
		predDF['Average'] = (predDF['Open'] + predDF['High'] + predDF['Low'] + predDF['Close'])/4
		d = predDF.index[-1]  
		for i in range(0,daysForward): 	#Add new days to the end for crystal ball predictions
			predDF.loc[d + BDay(i+1), 'Average_Predicted'] = 0
		predDF['PastSlope']  = predDF['Average'].shift(daysForward) / predDF['Average'].shift(daysForward*2)
		predDF['Average_Predicted'] = predDF['Average'].shift(daysForward) * predDF['PastSlope'] 
		predDF['PercentageDeviation'] = abs((predDF['Average']-predDF['Average_Predicted'])/predDF['Average'])
	else:
		learningModule = StockPredictionNN()
		SourceFieldList = ['High','Low','Open','Close']
		if predictionMethod ==1:	#LSTM learning
			print('Running LSTM model predicting ' + str(daysForward) + ' days...')
			window_size = 1
			modelDescription = prices.stockTicker + '_LSTM' + '_epochs' + str(numberOfLearningPasses) + '_histwin' + str(window_size) + '_daysforward' + str(daysForward) 
			learningModule.LoadData(prices.GetPriceHistory(), window_size=window_size, prediction_target_days=daysForward, UseLSTM=True, SourceFieldList=SourceFieldList, batch_size=10, train_test_split=.93)
			learningModule.TrainLSTM(epochs=numberOfLearningPasses, learning_rate=2e-5, dropout_rate=0.8, gradient_clip_margin=4)
		elif predictionMethod ==2: 	#CNN Learning
			print('Running CNN model predicting ' + str(daysForward) + ' days...')
			window_size = 16 * daysForward
			modelDescription = prices.stockTicker + '_CNN' + '_epochs' + str(numberOfLearningPasses) + '_histwin' + str(window_size) + '_daysforward' + str(daysForward) 
			learningModule.LoadData(prices.GetPriceHistory(), window_size=window_size, prediction_target_days=daysForward, UseLSTM=False, SourceFieldList=SourceFieldList, batch_size=32, train_test_split=.93)
			learningModule.TrainCNN(epochs=numberOfLearningPasses)
		predDF = learningModule.GetTrainingResults(True, True)
	averageDeviation = predDF['PercentageDeviation'].tail(round(predDF.shape[0]/4)).mean() #Average of the last 25% to account for training.
	print('Average deviation: ', averageDeviation * 100, '%')
	predDF = predDF.reindex(sorted(predDF.columns), axis=1) #Sort columns alphabetical
	predDF.to_csv(dataFolder + modelDescription + '.csv')
	plot.PlotDataFrame(predDF[['Average','Average_Predicted']], modelDescription, 'Date', 'Price', True, 'experiment/' + modelDescription) 
	plot.PlotDataFrameDateRange(predDF[['Average','Average_Predicted']], None, 160, modelDescription + '_last160ays', 'Date', 'Price', dataFolder + modelDescription + '_last160Days') 
	plot.PlotDataFrameDateRange(predDF[['Average','Average_Predicted']], None, 1000, modelDescription + '_last1000ays', 'Date', 'Price', dataFolder + modelDescription + '_last1000Days') 
Пример #12
0
def PredictPrices(prices: PricingData,
                  predictionMethod: int = 0,
                  daysForward: int = 5,
                  numberOfLearningPasses: int = 500):
    #Procedure to execute a given prediction method: linear projection, LSTM, CNN
    #Results are exported to the "experiment" sub folder, including a CSV file containing actual and predicted data, and graphs
    assert (0 <= predictionMethod <= 2)
    plot = PlotHelper()
    if predictionMethod == 0:  #Linear projection
        print('Running Linear Projection model predicting ' +
              str(daysForward) + ' days...')
        modelDescription = prices.stockTicker + '_Linear_daysforward' + str(
            daysForward)
        predDF = prices.GetPriceHistory()
        predDF['Average'] = (predDF['Open'] + predDF['High'] + predDF['Low'] +
                             predDF['Close']) / 4
        d = predDF.index[-1]
        for i in range(
                0, daysForward
        ):  #Add new days to the end for crystal ball predictions
            predDF.loc[d + BDay(i + 1), 'Average_Predicted'] = 0
        predDF['PastSlope'] = predDF['Average'].shift(
            daysForward) / predDF['Average'].shift(daysForward * 2)
        predDF['Average_Predicted'] = predDF['Average'].shift(
            daysForward) * predDF['PastSlope']
        predDF['PercentageDeviation'] = abs(
            (predDF['Average'] - predDF['Average_Predicted']) /
            predDF['Average'])
    else:
        SourceFieldList = ['High', 'Low', 'Open', 'Close']
        if predictionMethod == 1:  #LSTM learning
            print('Running LSTM model predicting ' + str(daysForward) +
                  ' days...')
            SourceFieldList = None
            UseLSTM = True
            window_size = 10
            modelDescription = prices.stockTicker + '_LSTM' + '_epochs' + str(
                numberOfLearningPasses) + '_histwin' + str(
                    window_size) + '_daysforward' + str(daysForward)
        elif predictionMethod == 2:  #CNN Learning
            print('Running CNN model predicting ' + str(daysForward) +
                  ' days...')
            UseLSTM = False
            window_size = 16 * daysForward
            modelDescription = prices.stockTicker + '_CNN' + '_epochs' + str(
                numberOfLearningPasses) + '_histwin' + str(
                    window_size) + '_daysforward' + str(daysForward)
        learningModule = StockPredictionNN(baseModelName=prices.stockTicker,
                                           UseLSTM=UseLSTM)
        learningModule.LoadSource(prices.GetPriceHistory(),
                                  FieldList=SourceFieldList,
                                  window_size=window_size)
        learningModule.LoadTarget(targetDF=None,
                                  prediction_target_days=daysForward)
        learningModule.MakeBatches(batch_size=32, train_test_split=.93)
        learningModule.Train(epochs=numberOfLearningPasses)
        learningModule.Predict(True)
        predDF = learningModule.GetTrainingResults(True, True)
        predDF['PercentageDeviation'] = abs(
            (predDF['Average'] - predDF['Average_Predicted']) /
            predDF['Average'])
    averageDeviation = predDF['PercentageDeviation'].tail(
        round(predDF.shape[0] /
              4)).mean()  #Average of the last 25% to account for training.
    print('Average deviation: ', averageDeviation * 100, '%')
    predDF = predDF.reindex(sorted(predDF.columns),
                            axis=1)  #Sort columns alphabetical
    predDF.to_csv(dataFolder + modelDescription + '.csv')
    plot.PlotDataFrame(predDF[['Average',
                               'Average_Predicted']], modelDescription, 'Date',
                       'Price', True, 'experiment/' + modelDescription)
    plot.PlotDataFrameDateRange(predDF[['Average', 'Average_Predicted']], None,
                                160, modelDescription + '_last160ays', 'Date',
                                'Price',
                                dataFolder + modelDescription + '_last160Days')
    plot.PlotDataFrameDateRange(
        predDF[['Average', 'Average_Predicted']], None, 1000,
        modelDescription + '_last1000ays', 'Date', 'Price',
        dataFolder + modelDescription + '_last1000Days')
Пример #13
0
def TrainTickerRaw(ticker: str = '^SPX',
                   UseLSTM: bool = True,
                   prediction_target_days: int = 5,
                   epochs: int = 500,
                   usePercentages: bool = False,
                   hidden_layer_size: int = 512,
                   dropout: bool = True,
                   dropout_rate: float = 0.01,
                   learning_rate: float = 2e-5):
    plot = PlotHelper()
    prices = PricingData(ticker)
    print('Loading ' + ticker)
    if prices.LoadHistory(True):
        prices.TrimToDateRange('1/1/2000', '3/1/2018')
        if usePercentages:
            prices.ConvertToPercentages(
            )  #Percentages don't work well I suspect because small errors have a huge impact when you revert back to the original prices and they roll forward
        else:
            prices.NormalizePrices()
        prices.CalculateStats()
        model = StockPredictionNN(baseModelName=ticker, UseLSTM=UseLSTM)
        if UseLSTM:
            window_size = 1
            modelDescription = ticker + '_LSTM'
            modelDescription += '_epochs' + str(epochs) + '_histwin' + str(
                window_size) + '_daysforward' + str(prediction_target_days)
            if usePercentages: modelDescription += '_percentages'
            FieldList = ['Average']
            model.LoadSource(sourceDF=prices.GetPriceHistory(),
                             FieldList=FieldList,
                             window_size=window_size)
            model.LoadTarget(targetDF=None,
                             prediction_target_days=prediction_target_days)
            model.MakeBatches(batch_size=128, train_test_split=.93)
            model.BuildModel(layer_count=1,
                             hidden_layer_size=hidden_layer_size,
                             dropout=dropout,
                             dropout_rate=dropout_rate,
                             learning_rate=learning_rate)
            model.DisplayModel()
            model.Train(epochs=epochs)
            model.Predict(True)
            model.Save()
            #model.DisplayDataSample()
        else:  #CNN
            window_size = 16 * prediction_target_days
            modelDescription = ticker + '_CNN'
            modelDescription += '_epochs' + str(epochs) + '_histwin' + str(
                window_size) + '_daysforward' + str(prediction_target_days)
            if usePercentages: modelDescription += '_percentages'
            #FieldList = None
            FieldList = ['High', 'Low', 'Open', 'Close']
            model.LoadSource(sourceDF=prices.GetPriceHistory(),
                             FieldList=FieldList,
                             window_size=window_size)
            model.LoadTarget(targetDF=None,
                             prediction_target_days=prediction_target_days)
            model.MakeBatches(batch_size=64, train_test_split=.93)
            model.BuildModel(layer_count=1,
                             hidden_layer_size=hidden_layer_size,
                             dropout=dropout,
                             dropout_rate=dropout_rate,
                             learning_rate=learning_rate)
            model.DisplayModel()
            model.Train(epochs=epochs)
            model.Predict(True)
            model.Save()
        if usePercentages:
            predDF = model.GetTrainingResults(True, True)
            predDF = predDF.loc[:, ['Average', 'Average_Predicted']]
            print('Unraveling percentages..')
            predDF['Average_Predicted'].fillna(0, inplace=True)
            predDF.iloc[0] = prices.CTPFactor['Average']
            for i in range(1, predDF.shape[0]):
                predDF.iloc[i] = (1 + predDF.iloc[i]) * predDF.iloc[i - 1]
            print(predDF)
            predDF['PercentageDeviation'] = abs(
                (predDF['Average'] - predDF['Average_Predicted']) /
                predDF['Average'])
            predDF.to_csv(dataFolder + modelDescription + '.csv')
            plot.PlotDataFrame(predDF[['Average', 'Average_Predicted']],
                               modelDescription, 'Date', 'Price', True,
                               dataFolder + modelDescription)
            plot.PlotDataFrameDateRange(
                predDF[['Average', 'Average_Predicted']], None, 160,
                modelDescription + '_last160ays', 'Date', 'Price',
                dataFolder + modelDescription + '_last160Days')
            plot.PlotDataFrameDateRange(
                predDF[['Average', 'Average_Predicted']], None, 500,
                modelDescription + '_last500Days', 'Date', 'Price',
                dataFolder + modelDescription + '_last500Days')
        else:
            model.PredictionResultsSave(modelDescription, True, True)
            model.PredictionResultsPlot(modelDescription, True, False)