def trainAgent(trainDf, backTestDf, genPath, saveAgentDir, saveAgentName): startTime = datetime.now() print("Start train time: {}".format(startTime)) openerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=hkFeatList).loadGenerator("{}".format(genPath)) buyerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=hkFeatList).loadGenerator("{}".format(genPath)) sellerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=hkFeatList).loadGenerator("{}".format(genPath)) trainEnv = CompositeEnv(trainDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001, stoplossPuncts=100, takeprofitPuncts=300, renderFlag=True, renderDir=saveDir, renderName="trainDealsPlot") backTestEnv = CompositeEnv(backTestDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001, stoplossPuncts=100, takeprofitPuncts=300, renderFlag=True, renderDir=saveDir, renderName="backTestDealsPlot") # get size of state and action from environment openerAgent = DQNAgent(trainEnv.observation_space["opener"], trainEnv.action_space["opener"].n, memorySize=2500, batch_size=100, train_start=200, epsilon_min=0.05, epsilon=1, epsilon_decay=0.9997, learning_rate=0.0002) buyerAgent = DQNAgent(trainEnv.observation_space["buyer"], trainEnv.action_space["buyer"].n, memorySize=5000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1, epsilon_decay=1.0, learning_rate=0.0002) sellerAgent = DQNAgent(trainEnv.observation_space["seller"], trainEnv.action_space["seller"].n, memorySize=5000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1, epsilon_decay=1.0, learning_rate=0.0002) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) lastSaveEp = agent.fit_agent(env=trainEnv, backTestEnv=backTestEnv, nEpisodes=4, nWarmUp=0, uniformEps=False, synEps=True, plotScores=False, saveBest=True, saveFreq=1, saveDir = saveAgentDir, saveName = saveAgentName) endTime = datetime.now() print("Training finished. Total time: {}".format(endTime - startTime)) return lastSaveEp
def useAgent(symbol, timeframe, terminal, dataUpdater, dataManager, hkFeatList, saveDir, genPath, agentName, timeConstraint): ############################### # use agent on test ############################### useCpu(nThreads=8, nCores=8) openerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=hkFeatList).loadGenerator(genPath) buyerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=hkFeatList).loadGenerator(genPath) sellerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=hkFeatList).loadGenerator(genPath) openerPriceDiffGenerator.setFitMode(False) buyerPriceDiffGenerator.setFitMode(False) sellerPriceDiffGenerator.setFitMode(False) testEnv = RealCompositeEnv(symbol, timeframe, terminal, dataUpdater, dataManager, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=300, lotSize=0.1, lotCoef=100000, stoplossPuncts=60, takeprofitPuncts=120, renderFlag=True) # get size of state and action from environment openerAgent = DQNAgent(testEnv.observation_space["opener"], testEnv.action_space["opener"].n) buyerAgent = DQNAgent(testEnv.observation_space["buyer"], testEnv.action_space["buyer"].n) sellerAgent = DQNAgent(testEnv.observation_space["seller"], testEnv.action_space["seller"].n) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) agent = agent.load_agent(saveDir, agentName, dropSupportModel=True) # agent = agent.load_agent("./", "checkpoint_composite") print("start using agent") startTime = datetime.now() agent.use_agent(testEnv, timeConstraint=timeConstraint) endTime = datetime.now() print("Use time: {}".format(endTime - startTime)) reset_keras()
def collectStatistics(testDf, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001 ): openerPriceDiffGenerator = W2VDiffGenerator(featureList=priceFeatList).loadGenerator("./w2vPriceDiffGen.pkl") buyerPriceDiffGenerator = W2VDiffGenerator(featureList=priceFeatList).loadGenerator("./w2vPriceDiffGen.pkl") sellerPriceDiffGenerator = W2VDiffGenerator(featureList=priceFeatList).loadGenerator("./w2vPriceDiffGen.pkl") openerVolumeDiffGenerator = W2VDiffGenerator(featureList=volumeFeatList).loadGenerator("./w2vVolumeDiffGen.pkl") buyerVolumeDiffGenerator = W2VDiffGenerator(featureList=volumeFeatList).loadGenerator("./w2vVolumeDiffGen.pkl") sellerVolumeDiffGenerator = W2VDiffGenerator(featureList=volumeFeatList).loadGenerator("./w2vVolumeDiffGen.pkl") openerPriceDiffGenerator.setFitMode(False) buyerPriceDiffGenerator.setFitMode(False) sellerPriceDiffGenerator.setFitMode(False) openerVolumeDiffGenerator.setFitMode(False) buyerVolumeDiffGenerator.setFitMode(False) sellerVolumeDiffGenerator.setFitMode(False) openerCompositeGenerator = W2VCompositeGenerator( [openerPriceDiffGenerator,openerVolumeDiffGenerator], flatStack=False) buyerCompositeGenerator = W2VCompositeGenerator( [buyerPriceDiffGenerator,buyerVolumeDiffGenerator], flatStack=False) sellerCompositeGenerator = W2VCompositeGenerator( [sellerPriceDiffGenerator,sellerVolumeDiffGenerator], flatStack=False) testEnv = CompositeEnv(testDf, openerCompositeGenerator, buyerCompositeGenerator, sellerCompositeGenerator, startDeposit=startDeposit, lotSize=lotSize, lotCoef=lotCoef, spread = spread, spreadCoef = spreadCoef, renderFlag=True) openerAgent = DQNAgent(testEnv.observation_space["opener"], testEnv.action_space["opener"].n) buyerAgent = DQNAgent(testEnv.observation_space["buyer"], testEnv.action_space["buyer"].n) sellerAgent = DQNAgent(testEnv.observation_space["seller"], testEnv.action_space["seller"].n) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) agent = agent.load_agent("./", "best_composite") print("start using agent") dealsStatistics = agent.use_agent(testEnv) return dealsStatistics
def useAgent(preprocDf, hkFeatList, saveDir, genPath, agentName, timeConstraint): ############################### # use agent on test ############################### useCpu(nThreads=8, nCores=8) testDf = preprocDf.tail(232) openerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=hkFeatList).loadGenerator(genPath) buyerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=hkFeatList).loadGenerator(genPath) sellerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=hkFeatList).loadGenerator(genPath) #openerPriceDiffGenerator.setFitMode(False) #buyerPriceDiffGenerator.setFitMode(False) #sellerPriceDiffGenerator.setFitMode(False) testEnv = CompositeEnv(testDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001, renderFlag=True, renderDir=saveDir, renderName="testDealsPlot") # get size of state and action from environment openerAgent = DQNAgent(testEnv.observation_space["opener"], testEnv.action_space["opener"].n) buyerAgent = DQNAgent(testEnv.observation_space["buyer"], testEnv.action_space["buyer"].n) sellerAgent = DQNAgent(testEnv.observation_space["seller"], testEnv.action_space["seller"].n) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) agent = agent.load_agent(saveDir, agentName, dropSupportModel=True) # agent = agent.load_agent("./", "checkpoint_composite") print("start using agent") startTime = datetime.now() dealsStatistics = agent.use_agent(testEnv, timeConstraint=timeConstraint) endTime = datetime.now() print("Use time: {}".format(endTime - startTime)) reset_keras() sumRew = 0 cumulativeReward = [] for i in range(len(dealsStatistics)): sumRew += dealsStatistics[i] cumulativeReward.append(sumRew) plt.plot([x for x in range(len(cumulativeReward))], cumulativeReward) plt.show()
def evaluateAgent(backTestDf, genPath, agentDir, agentName): openerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=hkFeatList).loadGenerator("{}".format(genPath)) buyerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=hkFeatList).loadGenerator("{}".format(genPath)) sellerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=hkFeatList).loadGenerator("{}".format(genPath)) testEnv = CompositeEnv(backTestDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001, stoplossPuncts=100, takeprofitPuncts=300, renderFlag=True, renderDir=saveDir, renderName="testDealsPlot") # get size of state and action from environment openerAgent = DQNAgent(testEnv.observation_space["opener"], testEnv.action_space["opener"].n) buyerAgent = DQNAgent(testEnv.observation_space["buyer"], testEnv.action_space["buyer"].n) sellerAgent = DQNAgent(testEnv.observation_space["seller"], testEnv.action_space["seller"].n) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) agent = agent.load_agent(agentDir, agentName) # agent = agent.load_agent("./", "checkpoint_composite") print("start using agent") dealsStatistics = agent.use_agent(testEnv) sumRew = 0 for i in range(len(dealsStatistics)): sumRew += dealsStatistics[i] return sumRew
def collectStatistics(testDf, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001): openerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl") buyerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl") sellerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl") #openerPriceDiffGenerator.setFitMode(False) #buyerPriceDiffGenerator.setFitMode(False) #sellerPriceDiffGenerator.setFitMode(False) testEnv = CompositeEnv(testDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=startDeposit, lotSize=lotSize, lotCoef=lotCoef, spread=spread, spreadCoef=spreadCoef, renderFlag=True) openerAgent = DQNAgent(testEnv.observation_space["opener"], testEnv.action_space["opener"].n) buyerAgent = DQNAgent(testEnv.observation_space["buyer"], testEnv.action_space["buyer"].n) sellerAgent = DQNAgent(testEnv.observation_space["seller"], testEnv.action_space["seller"].n) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) #agent = agent.load_agent("./", "best_composite") agent = agent.load_agent("./", "checkpoint_composite") print("start using agent") dealsStatistics = agent.use_agent(testEnv) return dealsStatistics
def trainAgent(trainDf, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001 ): openerPriceDiffGenerator = W2VDiffGenerator(featureList=priceFeatList).loadGenerator("./w2vPriceDiffGen.pkl") buyerPriceDiffGenerator = W2VDiffGenerator(featureList=priceFeatList).loadGenerator("./w2vPriceDiffGen.pkl") sellerPriceDiffGenerator = W2VDiffGenerator(featureList=priceFeatList).loadGenerator("./w2vPriceDiffGen.pkl") openerVolumeDiffGenerator = W2VDiffGenerator(featureList=volumeFeatList).loadGenerator("./w2vVolumeDiffGen.pkl") buyerVolumeDiffGenerator = W2VDiffGenerator(featureList=volumeFeatList).loadGenerator("./w2vVolumeDiffGen.pkl") sellerVolumeDiffGenerator = W2VDiffGenerator(featureList=volumeFeatList).loadGenerator("./w2vVolumeDiffGen.pkl") openerCompositeGenerator = W2VCompositeGenerator( [openerPriceDiffGenerator,openerVolumeDiffGenerator], flatStack=False) buyerCompositeGenerator = W2VCompositeGenerator( [buyerPriceDiffGenerator,buyerVolumeDiffGenerator], flatStack=False) sellerCompositeGenerator = W2VCompositeGenerator( [sellerPriceDiffGenerator,sellerVolumeDiffGenerator], flatStack=False) trainEnv = CompositeEnv(trainDf, openerCompositeGenerator, buyerCompositeGenerator, sellerCompositeGenerator, startDeposit=startDeposit, lotSize=lotSize, lotCoef=lotCoef, spread = spread, spreadCoef = spreadCoef, renderFlag=True) openerAgent = DQNAgent(trainEnv.observation_space["opener"], trainEnv.action_space["opener"].n, memorySize=500, batch_size=100, train_start=200, epsilon_min=0.1, epsilon=1, epsilon_decay=0.9994) buyerAgent = DQNAgent(trainEnv.observation_space["buyer"], trainEnv.action_space["buyer"].n, memorySize=1000, batch_size=200, train_start=300, epsilon_min=0.1, epsilon=1, epsilon_decay=0.9999) sellerAgent = DQNAgent(trainEnv.observation_space["seller"], trainEnv.action_space["seller"].n, memorySize=1000, batch_size=200, train_start=300, epsilon_min=0.1, epsilon=1, epsilon_decay=0.9999) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) agent.fit_agent(env=trainEnv, nEpisodes=15, nWarmUp = 0, uniformEps = False, plotScores=True, saveBest=True, saveFreq=2)
def evaluateAgent(backTestDf): openerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=hkFeatList).loadGenerator("./MSDiffGen.pkl") buyerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=hkFeatList).loadGenerator("./MSDiffGen.pkl") sellerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=hkFeatList).loadGenerator("./MSDiffGen.pkl") testEnv = CompositeEnv(backTestDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001, renderFlag=True) # get size of state and action from environment openerAgent = DQNAgent(testEnv.observation_space["opener"], testEnv.action_space["opener"].n) buyerAgent = DQNAgent(testEnv.observation_space["buyer"], testEnv.action_space["buyer"].n) sellerAgent = DQNAgent(testEnv.observation_space["seller"], testEnv.action_space["seller"].n) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) agent = agent.load_agent("./", "best_composite") # agent = agent.load_agent("./", "checkpoint_composite") print("start using agent") dealsStatistics = agent.use_agent(testEnv) sumRew = 0 for i in range(len(dealsStatistics)): sumRew += dealsStatistics[i] return sumRew
def collectStatistics(symbol, nExperiment, trainDf, backTestDf, testDf, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001): reset_keras() openerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl") buyerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl") sellerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl") trainEnv = CompositeEnv(trainDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=startDeposit, lotSize=lotSize, lotCoef=lotCoef, spread=spread, spreadCoef=spreadCoef, renderFlag=True) backTestEnv = CompositeEnv(backTestDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=startDeposit, lotSize=lotSize, lotCoef=lotCoef, spread=spread, spreadCoef=spreadCoef, renderFlag=True) forwardTestEnv = CompositeEnv(testDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=startDeposit, lotSize=lotSize, lotCoef=lotCoef, spread=spread, spreadCoef=spreadCoef, renderFlag=True) # get size of state and action from environment openerAgent = DQNAgent(trainEnv.observation_space["opener"], trainEnv.action_space["opener"].n, memorySize=2500, batch_size=100, train_start=200, epsilon_min=0.05, epsilon=1, epsilon_decay=0.9995, learning_rate=0.0005) buyerAgent = DQNAgent(trainEnv.observation_space["buyer"], trainEnv.action_space["buyer"].n, memorySize=5000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1, epsilon_decay=1.0, learning_rate=0.0005) sellerAgent = DQNAgent(trainEnv.observation_space["seller"], trainEnv.action_space["seller"].n, memorySize=5000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1, epsilon_decay=1.0, learning_rate=0.0005) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) continueFit = False trainDealsStatistics = [] testDealsStatistics = [] backDealsStatistics = [] for i in range(20): agent.fit_agent(env=trainEnv, backTestEnv=None, nEpisodes=1, nWarmUp=0, uniformEps=False, synEps=True, plotScores=True, saveBest=False, saveFreq=111, continueFit=continueFit) continueFit = True trainDealsStatistics.append( agent.use_agent(trainEnv) ) testDealsStatistics.append( agent.use_agent(forwardTestEnv) ) backDealsStatistics.append( agent.use_agent(backTestEnv) ) with open("./" + "trainDealsStatistics_{}_{}.pkl".format(symbol, nExperiment), mode="wb") as dealsFile: joblib.dump(trainDealsStatistics, dealsFile) with open("./" + "testDealsStatistics_{}_{}.pkl".format(symbol, nExperiment), mode="wb") as dealsFile: joblib.dump(testDealsStatistics, dealsFile) with open("./" + "backDealsStatistics_{}_{}.pkl".format(symbol, nExperiment), mode="wb") as dealsFile: joblib.dump(backDealsStatistics, dealsFile) pass
trainEnv = CompositeEnv(trainDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001, renderFlag=True) backTestDf = modDf.tail(9200).head(2200) backTestEnv = CompositeEnv(backTestDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001, renderFlag=True) # get size of state and action from environment openerAgent = DQNAgent(trainEnv.observation_space["opener"], trainEnv.action_space["opener"].n, memorySize=2500, batch_size=100, train_start=200, epsilon_min=0.05, epsilon=1, epsilon_decay=0.9999, learning_rate=0.0001) buyerAgent = DQNAgent(trainEnv.observation_space["buyer"], trainEnv.action_space["buyer"].n, memorySize=5000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1, epsilon_decay=1.0, learning_rate=0.0001) sellerAgent = DQNAgent(trainEnv.observation_space["seller"], trainEnv.action_space["seller"].n, memorySize=5000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1, epsilon_decay=1.0, learning_rate=0.0001) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) #agent = agent.load_agent("./", "best_composite") #agent = agent.load_agent("./", "checkpoint_composite") lastSaveEp = agent.fit_agent(env=trainEnv, backTestEnv=backTestEnv, nEpisodes=25, nWarmUp=0, uniformEps=False, synEps=True, plotScores=True, saveBest=True, saveFreq=1) endTime = datetime.now() print("Training finished. Total time: {}".format(endTime - startTime)) ############################### # use agent ############################### testDf = modDf.tail(7200).tail(2200) #testDf = modDf.tail(6032).head(5032) #testDf = modDf.tail(6032).tail(1032)
openerCompositeGenerator = W2VCompositeGenerator( [openerEnergyScalerGenerator, openerEnergyDiffGenerator], flatStack=False) buyerCompositeGenerator = W2VCompositeGenerator( [buyerEnergyScalerGenerator, buyerEnergyDiffGenerator], flatStack=False) sellerCompositeGenerator = W2VCompositeGenerator( [sellerEnergyScalerGenerator, sellerEnergyDiffGenerator], flatStack=False) testEnv = CompositeEnv(testDf, openerCompositeGenerator, buyerCompositeGenerator, sellerCompositeGenerator, startDeposit=300, lotSize=0.01, lotCoef=100000, renderFlag=True) # get size of state and action from environment openerAgent = DQNAgent(testEnv.observation_space["opener"], testEnv.action_space["opener"].n) buyerAgent = DQNAgent(testEnv.observation_space["buyer"], testEnv.action_space["buyer"].n) sellerAgent = DQNAgent(testEnv.observation_space["seller"], testEnv.action_space["seller"].n) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) agent = agent.load_agent("./", "best_composite") print("start using agent") agent.use_agent(testEnv) """realEnv = RealEnv( symbol=symbol, timeframe=timeframe, terminal=terminal, dataUpdater=dataUpdater, dataManager=dataManager, featureFactory=featureFactory, obsFeatList=obsFeatList) agent.use_agent(realEnv)"""
batch_size=1000, train_start=1200, epsilon_decay=0.999) buyerAgent = DQNAgent(trainEnv.observation_space["buyer"][0], trainEnv.action_space["buyer"].n, memorySize=2000, batch_size=1000, train_start=1200, epsilon_decay=0.999) sellerAgent = DQNAgent(trainEnv.observation_space["seller"][0], trainEnv.action_space["seller"].n, memorySize=2000, batch_size=1000, train_start=1200, epsilon_decay=0.999) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) agent.fit_agent(env=trainEnv, nEpisodes=10000, plotScores=True, saveFreq=10) #TODO: add save best only agent.save_agent("./", "test_composite_agent") print("agent saved") ############################### # use agent ############################### """testDF = df[int(len(df)*0.9):] featureFactory = ScalerGenerator(featureList=obsFeatList, fitOnStep=True) featureFactory.globalFit(df[:int(len(df)*0.9)]) testEnv = FeatureGenEnv(testDF, featureFactory, startDeposit=10, lotSize=0.01, lotCoef=10, renderFlag=True) state_size = testEnv.observation_space.shape[0] action_size = testEnv.action_space.n print("loading agent")
openerPriceDiffGenerator.setFitMode(False) buyerPriceDiffGenerator.setFitMode(False) sellerPriceDiffGenerator.setFitMode(False) testEnv = RealCompositeEnv(symbol, timeframe, terminal, dataUpdater, dataManager, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=300, lotSize=0.1, lotCoef=100000, renderFlag=True) # get size of state and action from environment openerAgent = DQNAgent(testEnv.observation_space["opener"], testEnv.action_space["opener"].n) buyerAgent = DQNAgent(testEnv.observation_space["buyer"], testEnv.action_space["buyer"].n) sellerAgent = DQNAgent(testEnv.observation_space["seller"], testEnv.action_space["seller"].n) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) agent = agent.load_agent("./", "best_composite", dropSupportModel=True) #agent = agent.load_agent("./", "checkpoint_composite") print("start using agent") agent.use_agent(testEnv)
def trainAgent(trainDf, backTestDf): openerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=hkFeatList).loadGenerator("./MSDiffGen.pkl") buyerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=hkFeatList).loadGenerator("./MSDiffGen.pkl") sellerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=hkFeatList).loadGenerator("./MSDiffGen.pkl") trainEnv = CompositeEnv(trainDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001, renderFlag=True) backTestEnv = CompositeEnv(backTestDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001, renderFlag=True) # get size of state and action from environment openerAgent = DQNAgent(trainEnv.observation_space["opener"], trainEnv.action_space["opener"].n, memorySize=2500, batch_size=100, train_start=200, epsilon_min=0.05, epsilon=1, epsilon_decay=0.9994, learning_rate=0.001) buyerAgent = DQNAgent(trainEnv.observation_space["buyer"], trainEnv.action_space["buyer"].n, memorySize=5000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1, epsilon_decay=0.9999, learning_rate=0.001) sellerAgent = DQNAgent(trainEnv.observation_space["seller"], trainEnv.action_space["seller"].n, memorySize=5000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1, epsilon_decay=0.9999, learning_rate=0.001) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) # agent = agent.load_agent("./", "best_composite") # agent = agent.load_agent("./", "checkpoint_composite") lastSaveEp = agent.fit_agent(env=trainEnv, backTestEnv=backTestEnv, nEpisodes=5, nWarmUp=0, uniformEps=False, synEps=True, plotScores=True, saveBest=True, saveFreq=1) endTime = datetime.now() print("Training finished. Total time: {}".format(endTime - startTime)) return lastSaveEp
batch_size=100, train_start=200, epsilon_decay=0.999) buyerAgent = DQNAgent(trainEnv.observation_space["buyer"], trainEnv.action_space["buyer"].n, memorySize=2000, batch_size=200, train_start=300, epsilon_decay=0.999) sellerAgent = DQNAgent(trainEnv.observation_space["seller"], trainEnv.action_space["seller"].n, memorySize=2000, batch_size=200, train_start=300, epsilon_decay=0.999) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) agent.fit_agent(env=trainEnv, nEpisodes=30, plotScores=True, saveBest=True, saveFreq=2) ############################### # use agent ############################### testDF = df[int(len(df) * 0.6):] testEnv = CompositeEnv(testDF, openerFeatureFactory, buyerFeatureFactory, sellerFatureFactory, startDeposit=10,
def trainAgent(trainDf, backTestDf, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001): openerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl") buyerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl") sellerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl") trainEnv = CompositeEnv(trainDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=startDeposit, lotSize=lotSize, lotCoef=lotCoef, spread=spread, spreadCoef=spreadCoef, renderFlag=True) backTestEnv = CompositeEnv(backTestDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=startDeposit, lotSize=lotSize, lotCoef=lotCoef, spread=spread, spreadCoef=spreadCoef, renderFlag=True) # get size of state and action from environment openerAgent = DQNAgent(trainEnv.observation_space["opener"], trainEnv.action_space["opener"].n, memorySize=2500, batch_size=100, train_start=200, epsilon_min=0.05, epsilon=1, epsilon_decay=0.9999, learning_rate=0.0002) buyerAgent = DQNAgent(trainEnv.observation_space["buyer"], trainEnv.action_space["buyer"].n, memorySize=5000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1, epsilon_decay=1.0, learning_rate=0.0002) sellerAgent = DQNAgent(trainEnv.observation_space["seller"], trainEnv.action_space["seller"].n, memorySize=5000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1, epsilon_decay=1.0, learning_rate=0.0002) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) lastSaveEp = agent.fit_agent(env=trainEnv, backTestEnv=backTestEnv, nEpisodes=3, nWarmUp=0, uniformEps=False, synEps=True, plotScores=True, saveBest=False, saveFreq=1) return lastSaveEp
buyerCompositeGenerator = W2VCompositeGenerator( [buyerPriceDiffGenerator, buyerVolumeDiffGenerator], flatStack=False) sellerCompositeGenerator = W2VCompositeGenerator( [sellerPriceDiffGenerator, sellerVolumeDiffGenerator], flatStack=False) #trainDf = df[:int(len(df)*0.9)] trainDf = df.tail(2044).head(1022) #trainDf = trainDf[:int(len(trainDf)*0.5)] trainEnv = CompositeEnv(trainDf, openerCompositeGenerator, buyerCompositeGenerator, sellerCompositeGenerator, startDeposit=300, lotSize=0.1, lotCoef=100000, renderFlag=True) # get size of state and action from environment openerAgent = DQNAgent(trainEnv.observation_space["opener"], trainEnv.action_space["opener"].n, memorySize=500, batch_size=100, train_start=200, epsilon_min=0.05, epsilon=1, epsilon_decay=0.9994) buyerAgent = DQNAgent(trainEnv.observation_space["buyer"], trainEnv.action_space["buyer"].n, memorySize=1000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1, epsilon_decay=0.9999) sellerAgent = DQNAgent(trainEnv.observation_space["seller"], trainEnv.action_space["seller"].n, memorySize=1000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1, epsilon_decay=0.9999) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) agent.fit_agent(env=trainEnv, nEpisodes=15, nWarmUp = 0, uniformEps = False, plotScores=True, saveBest=True, saveFreq=2) ############################### # use agent ############################### #testDf = df.copy()#[int(len(df)*0.9):] testDf = df.tail(2044).tail(1022) #check scaling at test env #testDf = testDf[int(len(testDf)*0.5):] openerPriceDiffGenerator = W2VDiffGenerator(featureList=hkFeatList).loadGenerator("./w2vPriceDiffGen.pkl") buyerPriceDiffGenerator = W2VDiffGenerator(featureList=hkFeatList).loadGenerator("./w2vPriceDiffGen.pkl") sellerPriceDiffGenerator = W2VDiffGenerator(featureList=hkFeatList).loadGenerator("./w2vPriceDiffGen.pkl") openerVolumeDiffGenerator = W2VDiffGenerator(featureList=volumeFeatList).loadGenerator("./w2vVolumeDiffGen.pkl") buyerVolumeDiffGenerator = W2VDiffGenerator(featureList=volumeFeatList).loadGenerator("./w2vVolumeDiffGen.pkl")