def testNN(net): stockHistory = StockHistory("nasdaq100") companies = stockHistory.compNames() featurizer = Featurizer(stockHistory) matches = 0 total = 0 for company in companies: trainingSet = TrainingSet(featurizer, company) for trainingExample in trainingSet: total += 1 features = trainingExample.features output = trainingExample.output nnOutput = [int(round(x)) for x in list(net.activate(features))] if nnOutput == output: matches += 1 print "Correct Percentage:", float(matches) / total, "%"
def trainNN(args): print "Begin training Neural Network" print print "Reading data" stockHistory = StockHistory("nasdaq100") companies = stockHistory.compNames() featurizer = Featurizer(stockHistory, companies[0]) nF = featurizer.numFeatures nO = featurizer.numTargetFeatures print print "Using", str(nF), "input features with", str(nO), "output features" print "Generating training set" ds = SupervisedDataSet(nF, nO) featureStats = [0 for i in range(nF)] outputStats = [0 for i in range(nO)] correlations = [[0 for i in range(nO)] for j in range(nF)] numTrainingExamples = 0 for company in companies: trainingSet = TrainingSet(stockHistory, company) for trainingExample in trainingSet: features = trainingExample.features output = trainingExample.output for i in range(nF): featureStats[i] += features[i] for j in range(nO): if output[j] == 1 and features[i] == output[j]: correlations[i][j] += 1 for i in range(nO): outputStats[i] += output[i] numTrainingExamples += 1 ds.addSample(features, output) print "Training set statistics:" print "Number of training examples:", numTrainingExamples print "Percent of examples with output:", [float(i) / numTrainingExamples for i in outputStats] print "i, Percent with feature, Correlation with output:" for i in range(len(correlations)): fs = float(featureStats[i]) / numTrainingExamples print i, "\t", fs, " \t", [float(c) / numTrainingExamples for c in correlations[i]] print print "Building network" netStructure = [nF, nF, nF, nO] net = buildNetwork(*netStructure, bias=True, hiddenclass=TanhLayer) trainer = BackpropTrainer(net, ds, verbose=True) print "Training" start = time() errorsPerEpoch = trainer.trainUntilConvergence(outFile=".".join(["nn"] + args + ["info"])) end = time() print "Training time:", (end - start) print errorsPerEpoch print for mod in net.modules: print "Module:", mod.name if mod.paramdim > 0: print "--parameters:", mod.params for conn in net.connections[mod]: print "-connection to", conn.outmod.name if conn.paramdim > 0: print "- parameters", conn.params if hasattr(net, "recurrentConns"): print "Recurrent connections" for conn in net.recurrentConns: print "-", conn.inmod.name, " to", conn.outmod.name if conn.paramdim > 0: print "- parameters", conn.params f = open(".".join(["nn"] + args + ["dat"]), "w") pickle.dump(net, f) f.close() print print "Graphing errors during training" try: plot(errorsPerEpoch) except: pass testNN(net) return net
def evalLinUCB(stockHistory=None, featurizer=None, dataSet='nasdaq100', graph=0, verbose=0, pickNum=5, money=10000.0, tradeCost=0.01, alpha=0.1): # sanitize input graph = int(graph) verbose = int(verbose) pickNum = float(pickNum) money = float(money) tradeCost= float(tradeCost) alpha = float(alpha) if stockHistory == None : stockHistory = StockHistory(dataSet) if featurizer == None : featurizer = Featurizer(stockHistory) startDate = stockHistory.startDate endDate = stockHistory.endDate currentDate = startDate testSizes = [0.1*i for i in range(1, 10)] testStartDates = [timedelta(days=int((endDate - startDate).days * (1 - testSize))) + startDate for testSize in testSizes] agents = [ LinUCBAgent(stockHistory, featurizer, tradeCost, pickNum, money, testStartDates, alpha), RandomAgent(tradeCost, pickNum, money, testStartDates), PrevBestAgent(tradeCost, pickNum, money, testStartDates) ] names = ['LinUCB', 'Random', 'PrevBest'] numAgents = len(agents) agentReturns = [[] for i in range(numAgents)] agentTestMoney = [[] for i in range(numAgents)] testingDays = [] print print 'Begin Training' while currentDate < endDate : sys.stdout.write("\r%s" %str(currentDate)) sys.stdout.flush() #if currentDate == testStartDate : # print # print 'Begin Testing' companies = [company for company in stockHistory.compNames() if featurizer.isValidDate(company, currentDate)] features = [featurizer.getFeatures(company, currentDate) for company in companies] returns = [stockHistory.getReturn(0, company, currentDate) for company in companies] if len(companies) > 0 : for agent, allReturns, allMoney in zip(agents, agentReturns, agentTestMoney) : chosenStocks, avgReturn, sharpeRatio = agent.select(companies, features, returns) allReturns.append(avgReturn) openPrices = {company: stockHistory.get(company, currentDate, OPEN) for company in companies} money = agent.updateMoney(currentDate, chosenStocks, openPrices, avgReturn) allMoney.append(money) #if currentDate > testStartDate: # testingDays.append(currentDate) # allMoney.append(money) # if verbose: # if agent == agents[0] : # print (' %10f\t%s\t%8.5f\t') % (money, chosenStocks, avgReturn), # else : print ('%8.5f\t') % avgReturn, # if agent == agents[numAgents - 1] : print currentDate = currentDate + timedelta(days=1) if graph : totalReturns = [[sum(returns[:i]) for i in range(len(returns))] for returns in agentReturns] plot(totalReturns, labels=names, ylabel='Total Return', xlabel='Time (Days)') for i in range(len(testSizes)) : yss = [] for testMoney in agentTestMoney : yss.append([m[i] for m in testMoney if m[i] != 10000]) plot(yss, labels=names, ylabel='Total Money', xlabel='Time (Days)') CAGRs = [agent.CAGRs(endDate) for agent in agents] #print 'Training days:', (testStartDate - startDate).days, 'Testing days:', (endDate - testStartDate).days, 'Total days:', (endDate - startDate).days for agent, name, cagr in zip(agents, names, CAGRs) : print name, '(money, CAGR):' print '\n'.join(str(s) for s in zip(testSizes, agent.moneyForTest, cagr)) print return CAGRs[0]