def runModel(): #Parameters for the game agent1 = AgentA2C.AgentA2C([True, 1.0]) #training agent agent2 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) agent3 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) # if training specific agents playersAgents = [agent1, agent2, agent3, agent4] reward = RewardOnlyWinning.RewardOnlyWinning() numGames = 1000 # amount of training games experimentDescriptor = "Training_NewPlot" DQLModel = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Player_4_Cards_11_games_3000TrainingAgents_['DQL', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-25_18:15:32.076987/Model/actor_iteration_2999_Player_0.hd5" loadModelAgent1 = "" #[actorModelDDPG,criticModelDDPG]#DQLModel#""# #"" #""#""#DQLModel#""#[actorModelDDPG,criticModelDDPG] ##""#[actorModelA2C,criticModelA2C] #[actorModelA2C,criticModelA2C] #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG] loadModelAgent2 = "" #DQLModel #"" #DQLModel loadModelAgent3 = "" #[actorModelDDPG,criticModelDDPG] loadModelAgent4 = "" # # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is loadModel = [ loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4 ] #indicate where the saved model is # # loadModel = "" #indicate where the saved model is # #Parameters for controling the experiment isLogging = False #Logg the experiment isPlotting = True #plot the experiment plotFrequency = 1000 #plot the plots every X games createDataset = True # weather to save the dataset saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/NewQPlot/" # Directory where the experiment will be saved metrics = ChefsHatExperimentHandler.runExperiment( numGames=numGames, playersAgents=playersAgents, experimentDescriptor=experimentDescriptor, isLogging=isLogging, isPlotting=isPlotting, plotFrequency=plotFrequency, createDataset=createDataset, saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward) print("Metrics:" + str(metrics))
def runModel(): #Plots plotsToGenerate = [plots["Experiment_Rounds"], plots["Experiment_FinishingPosition"], plots["Experiment_ActionsBehavior"], plots["Experiment_Reward"], plots["Experiment_QValues"], plots["Experiment_Mood"], plots["Experiment_MoodNeurons"], plots["Experiment_SelfProbabilitySuccess"]] plotsToGenerate = [] #Parameters for the agents agent1 = AgentDQL.AgentDQL([False, 1.0, "DQL"]) #training agent agent2 = AgentPPO.AgentPPO([False, 1.0, "PPO"]) #training agent agent3 = AgentA2C.AgentA2C([False, 1.0, "A2C"]) # training agent agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) # training agent
def runModel(): #Parameters for the game agent1 = AgentDQL.AgentDQL([True, 1.0]) #training agent agent2 = AgentA2C.AgentA2C([True, 1.0]) agent3 = AgentPPO.AgentPPO([True, 1.0]) agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) # if training specific agents playersAgents = [agent1, agent2, agent3, agent4] reward = RewardOnlyWinning.RewardOnlyWinning() numGames = 1000 # amount of training games experimentDescriptor = "Training" DQLModel = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0_2020-03-26_17:26:18.198600/Model/actor_iteration_999_Player_2.hd5" A2cActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch - Second Best - Best - _2020-03-26_19:11:39.863435/Model/actor_iteration_999_Player_0.hd5" A2cCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch - Second Best - Best - _2020-03-26_19:11:39.863435/Model/critic_iteration_999_Player_0.hd5" PPOActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/PPO/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_9_Training_Best_Agent_3Choice_ Scratch - Scratch - Scratch - _2020-03-26_22:27:02.430568/Model/actor_iteration_999_Player_3.hd5" PPOCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/PPO/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_9_Training_Best_Agent_3Choice_ Scratch - Scratch - Scratch - _2020-03-26_22:27:02.430568/Model/critic_iteration_999_Player_3.hd5" loadModelAgent1 = DQLModel #""#""#"" #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG] loadModelAgent2 = [A2cActor, A2cCritic] #""#""# ""#[actorModel,criticModel] loadModelAgent3 = [PPOActor, PPOCritic] #""#""# "" loadModelAgent4 = "" #""#DQLModelr#""#""# "" # # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is loadModel = [ loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4 ] # indicate where the saved model is # # loadModel = "" #indicate where the saved model is # #Parameters for controling the experiment isLogging = False # Logg the experiment isPlotting = True # plot the experiment plotFrequency = 1000 # plot the plots every X games createDataset = True # weather to save the dataset saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/All/QValuePlot" # Directory where the experiment will be saved metrics = ChefsHatExperimentHandler.runExperiment( numGames=numGames, playersAgents=playersAgents, experimentDescriptor=experimentDescriptor, isLogging=isLogging, isPlotting=isPlotting, plotFrequency=plotFrequency, createDataset=createDataset, saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward) print("Metrics:" + str(metrics))
def runModel(): #Parameters for the game agent1 = AgentA2C.AgentA2C([False, 1.0]) #training agent agent2 = AgentA2C.AgentA2C([False, 1.0]) agent3 = AgentA2C.AgentA2C([False, 1.0]) agent4 = AgentA2C.AgentA2C([False, 1.0]) # if training specific agents playersAgents = [agent1, agent2, agent3, agent4] reward = RewardOnlyWinning.RewardOnlyWinning() numRuns = 10 #Amount of runs numGames = 100 # amount of games per run experimentDescriptor = "Testing_NewPlot" A2cActor_1 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch - Second Best - Best - _2020-03-26_19:11:39.863435/Model/actor_iteration_999_Player_0.hd5" A2cCritic_1 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch - Second Best - Best - _2020-03-26_19:11:39.863435/Model/critic_iteration_999_Player_0.hd5" A2cActor_4 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_4_Training_Best_Agent_1Choice_ Best - Scratch - Scratch - _2020-03-26_19:27:58.464895/Model/actor_iteration_999_Player_0.hd5" A2cCritic_4 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_4_Training_Best_Agent_1Choice_ Best - Scratch - Scratch - _2020-03-26_19:27:58.464895/Model/critic_iteration_999_Player_0.hd5" A2cActor_9 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_1_Training_Best_Agent_2_2020-03-26_17:33:51.517296/Model/actor_iteration_999_Player_1.hd5" A2cCritic_9 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_2_Training_Best_Agent_1_2020-03-26_17:42:24.306637/Model/actor_iteration_999_Player_1.hd5" A2cActor_r = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_14:14:16.796731/Model/actor_iteration_999_Player_0.hd5" A2cCritic_r = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_14:14:16.796731/Model/critic_iteration_999_Player_0.hd5" loadModelAgent1 = [ A2cActor_1, A2cCritic_1 ] #""#""#"" #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG] loadModelAgent2 = [A2cActor_4, A2cCritic_4] #""#""# ""#[actorModel,criticModel] loadModelAgent3 = [A2cActor_9, A2cCritic_9] #""#""# "" loadModelAgent4 = [A2cActor_r, A2cCritic_r] #""#DQLModelr#""#""# "" # loadModel = [ loadModelAgent4, loadModelAgent3, loadModelAgent1, loadModelAgent2 ] #indicate where the saved model is # loadModel = [loadModelAgent3, loadModelAgent2, loadModelAgent1, # loadModelAgent4] # indicate where the saved model is # # loadModel = "" #indicate where the saved model is # #Parameters for controling the experiment isLogging = False # Logg the experiment isPlotting = True # plot the experiment plotFrequency = 1000 # plot the plots every X games createDataset = True # weather to save the dataset saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/" # Directory where the experiment will be saved winsP1 = [] winsP2 = [] winsP3 = [] winsP4 = [] for i in range(numRuns): metrics = ChefsHatExperimentHandler.runExperiment( numGames=numGames, playersAgents=playersAgents, experimentDescriptor=experimentDescriptor, isLogging=isLogging, isPlotting=isPlotting, plotFrequency=plotFrequency, createDataset=createDataset, saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward) # Player1 - agent p1 = metrics[2] p2 = metrics[3] p3 = metrics[4] p4 = metrics[5] winsP1.append(p1[0]) winsP2.append(p2[0]) winsP3.append(p3[0]) winsP4.append(p4[0]) print("Metrics:" + str(metrics)) plotVictoriesTotal(winsP1, winsP2, winsP3, winsP4, numGames, experimentDescriptor, saveExperimentsIn)
def runModel(): #Parameters for the game agent1 = AgentA2C.AgentA2C([True, 1.0]) #training agent agent2 = AgentA2C.AgentA2C([True, 1.0]) agent3 = AgentA2C.AgentA2C([True, 1.0]) agent4 = AgentA2C.AgentA2C([True, 1.0]) # if training specific agents playersAgents = [agent1, agent2, agent3, agent4] reward = RewardOnlyWinning.RewardOnlyWinning() numExperiments = 10 # number of experiments. At the end of each experiment, we copy the best player and make them play against each other. numGames = 1000 # amount of training games experimentDescriptor = "Training" loadModelAgent1 = "" #""#"" #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG] loadModelAgent2 = "" #""# ""#[actorModel,criticModel] loadModelAgent3 = "" #""# "" loadModelAgent4 = "" #""# "" # # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is loadModel = [ loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4 ] #indicate where the saved model is # # loadModel = "" #indicate where the saved model is # #Parameters for controling the experiment isLogging = False #Logg the experiment isPlotting = True #plot the experiment plotFrequency = 1000 #plot the plots every X games createDataset = False # weather to save the dataset saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/" # Directory where the experiment will be saved # #Initial Run # metrics = ChefsHatExperimentHandler.runExperiment(numGames=numGames, playersAgents=playersAgents, # experimentDescriptor=experimentDescriptor, isLogging=isLogging, # isPlotting=isPlotting, plotFrequency=plotFrequency, # createDataset=createDataset, saveExperimentsIn=saveExperimentsIn, # loadModel=loadModel, rewardFunction=reward) bestAgent = 0 description = experimentDescriptor epsilon = 1.0 bestAgentsList = [] secondBestList = [] lastBestAgent = "" for i in range(numExperiments): agents = [] agentsChoice = "" for agentNumber in range(3): probNumber = numpy.random.rand() if probNumber <= 0.33: #Pull from the BestAgentList if len(bestAgentsList) == 0: agents.append("") else: random.shuffle(bestAgentsList) agents.append(bestAgentsList[0]) agentsChoice = agentsChoice + " Best - " elif probNumber > 0.33 and probNumber <= 0.66: #Pull from the secondBestList if len(secondBestList) == 0: agents.append("") else: random.shuffle(secondBestList) agents.append(secondBestList[0]) agentsChoice = agentsChoice + " Second Best - " else: #start a new agent from the scratch agents.append("") agentsChoice = agentsChoice + " Scratch - " agents.append(lastBestAgent) loadModel = agents # Train the best scored one agent1 = AgentA2C.AgentA2C([True, epsilon]) # training agent agent2 = AgentA2C.AgentA2C([True, epsilon]) agent3 = AgentA2C.AgentA2C([True, epsilon]) agent4 = AgentA2C.AgentA2C([True, epsilon]) epsilon = epsilon * 0.7 if epsilon < 0.1: epsilon = 0.1 # if training specific agents playersAgents = [agent1, agent2, agent3, agent4] # loadModelAgent1 = loadModel[0] # DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG] # # loadModelAgent2 = loadModel[1] # [actorModel,criticModel] # # loadModelAgent3 = loadModel[2] # loadModelAgent4 = loadModel[3] # # loadModel = [loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4] numGames = 3000 plotFrequency = 1000 # plot the plots every X games print("Choices: " + str(agentsChoice)) print("Best agent: " + str(bestAgent) + " - Loading:" + str(loadModel)) # input("here") # experimentDescriptor = description + "_GameExperimentNumber_" + str(i) + "_Best_Agent_" + str(bestAgent) experimentDescriptor = description + "_GameExperimentNumber_" + str( i) + "_Training_Best_Agent_" + str(bestAgent) + "Choice_" + str( agentsChoice) metrics = ChefsHatExperimentHandler.runExperiment( numGames=numGames, playersAgents=playersAgents, experimentDescriptor=experimentDescriptor, isLogging=isLogging, isPlotting=isPlotting, plotFrequency=plotFrequency, createDataset=createDataset, saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward) # Evaluate them without training them # print("Train metrics:" + str(metrics)) # Get Trained Agents p1 = metrics[2] p2 = metrics[3] p3 = metrics[4] p4 = metrics[5] loadModelAgent1 = p1[4] loadModelAgent2 = p2[4] loadModelAgent3 = p3[4] loadModelAgent4 = p4[4] loadModel = [ loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4 ] #Initialize evaluation agents agent1 = AgentA2C.AgentA2C([False, 0.1]) agent2 = AgentA2C.AgentA2C([False, 0.1]) agent3 = AgentA2C.AgentA2C([False, 0.1]) agent4 = AgentA2C.AgentA2C([False, 0, 1]) playersAgents = [agent1, agent2, agent3, agent4] print("Testing - loading: " + str(loadModel)) # input("here") experimentDescriptor = description + "_GameExperimentNumber_" + str( i) + "_Test" numGames = 100 plotFrequency = 100 # plot the plots every X games metrics = ChefsHatExperimentHandler.runExperiment( numGames=numGames, playersAgents=playersAgents, experimentDescriptor=experimentDescriptor, isLogging=isLogging, isPlotting=isPlotting, plotFrequency=plotFrequency, createDataset=createDataset, saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward) # Player1 - agent p1 = metrics[2] p2 = metrics[3] p3 = metrics[4] p4 = metrics[5] wins = (numpy.average(p1[2]), numpy.average(p2[2]), numpy.average(p3[2]), numpy.average(p4[2])) #Reward # wins = (numpy.array(p1[0].sum(), p2[0], p3[0], p4[0]) # Wins bestAgent = 0 secondBestAgent = 0 bestWin = -5000 secondBestWin = -5000 for a in range(4): if wins[a] >= bestWin: bestWin = wins[a] bestAgent = a if wins[a] >= secondBestWin and wins[a] < bestWin: secondBestWin = wins[a] secondBestAgent = a bestAgentsList.append(loadModel[bestAgent]) lastBestAgent = loadModel[bestAgent] secondBestList.append(loadModel[secondBestAgent]) # loadModel = [loadModel[bestAgent], loadModel[bestAgent], loadModel[bestAgent], loadModel[bestAgent]] print("Best Agent: " + str(bestAgent)) print("Rewards: " + str(wins)) # input("Here") print("Metrics:" + str(metrics))
import cv2 import numpy #Experiment control variables dataSetLocation = "dataset.pkl" #location of the dataset.PKL file saveMoodDataset = "" #Location where the Mood dataset will be saved saveMoodPlot = "" #Location where the Mood Plots will be saved gameToGenerateMood = 0 # Game from which to generate the mood. #Agents agent1 = AgentDQL.AgentDQL([False, 1.0, "DQL"]) #training agent agent2 = AgentPPO.AgentPPO([False, 1.0, "PPO"]) #training agent agent3 = AgentA2C.AgentA2C([False, 1.0, "A2C"]) # training agent agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) agents = [agent1,agent2,agent3,agent4] DQLModel = "dql.dh5" # Location of the trained DQL model A2cActor = "a2cActor.dh5" # Location of the trained A2C Actor model A2cCritic = "a2cCritic.dh5" # Location of the trained A2C Critic model PPOActor = "ppoActor.dh5" # Location of the trained PPO Actor model PPOCritic = "ppoCritic.dh5" # Location of the trained PPO Critic model loadModelAgent1 = DQLModel loadModelAgent2 = [PPOActor, PPOCritic] loadModelAgent3 = [A2cActor,
def runModel(): #Parameters for the game agent1 = AgentA2C.AgentA2C([False, 1.0]) #training agent agent2 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) agent3 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) # if training specific agents playersAgents = [agent1, agent2, agent3, agent4] reward = RewardOnlyWinning.RewardOnlyWinning() numRuns = 1 #Amount of runs numGames = 100 # amount of games per run experimentDescriptor = "Testing_NewPlot_Stack" A2CActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_14:14:16.796731/Model/actor_iteration_999_Player_0.hd5" A2CCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_14:14:16.796731/Model/critic_iteration_999_Player_0.hd5" loadModelAgent1 = [ A2CActor, A2CCritic ] #""#[actorModelDDPG,criticModelDDPG]#DQLModel#""# #"" #""#""#DQLModel#""#[actorModelDDPG,criticModelDDPG] ##""#[actorModelA2C,criticModelA2C] #[actorModelA2C,criticModelA2C] #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG] loadModelAgent2 = "" #DQLModel #"" #DQLModel loadModelAgent3 = "" #[actorModelDDPG,criticModelDDPG] loadModelAgent4 = "" # # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is loadModel = [ loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4 ] #indicate where the saved model is # # loadModel = "" #indicate where the saved model is # #Parameters for controling the experiment isLogging = False #Logg the experiment isPlotting = False #plot the experiment plotFrequency = 1 #plot the plots every X games createDataset = True # weather to save the dataset saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/AllQValues" # Directory where the experiment will be saved winsP1 = [] winsP2 = [] winsP3 = [] winsP4 = [] for i in range(numRuns): metrics = ChefsHatExperimentHandler.runExperiment( numGames=numGames, playersAgents=playersAgents, experimentDescriptor=experimentDescriptor, isLogging=isLogging, isPlotting=isPlotting, plotFrequency=plotFrequency, createDataset=createDataset, saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward) # Player1 - agent p1 = metrics[2] p2 = metrics[3] p3 = metrics[4] p4 = metrics[5] winsP1.append(p1[0]) winsP2.append(p2[0]) winsP3.append(p3[0]) winsP4.append(p4[0]) # print ("Metrics:" + str(metrics)) plotVictoriesTotal(winsP1, winsP2, winsP3, winsP4, numGames, experimentDescriptor, saveExperimentsIn)
def runModel(): #Plots plotsToGenerate = [] demonstrations = numpy.load("/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/NEURIPS2020/AIRL/ExpertObs/Demonstrations_ExpertCollection.npy", allow_pickle=True) #Parameters for the agents agentDQL = AgentDQL.AgentDQL([False, 1.0, "DQL"]) #training agent agentA2C = AgentA2C.AgentA2C([False, 1.0, "A2C"]) # training agent agentPPO = AgentPPO.AgentPPO([False, 1.0, "PPO"]) # training agent agentAIRL = AIRL.AgentAIRL([False, 1.0, "AIRL", None, demonstrations]) # training agent possibleAgent1 = [agentDQL,agentA2C, agentPPO, agentAIRL ] agent2 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) # training agent agent3 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) # training agent agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) # training agent #Load agents from DQLModel = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/TrainedModels/DQL_vsRandom/actor_iteration_999_Player_0.hd5" A2cActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/TrainedModels/A2C_vsEveryone/actor_iteration_999_Player_1.hd5" A2cCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/TrainedModels/A2C_vsEveryone/critic_iteration_999_Player_1.hd5" PPOActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/TrainedModels/PPO_vsEveryone/actor_iteration_999_Player_2.hd5" PPOCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/TrainedModels/PPO_vsEveryone/critic_iteration_999_Player_2.hd5" AIRLModel = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/NEURIPS2020/AIRL/Training/Player_4_Cards_11_games_5000TrainingAgents_['DQL_AIRL', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Train_NegativeReward_notInverted_2020-04-05_16:31:22.781799/Model/actor_iteration_4999_Player_0.hd5" AIRLReward = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/NEURIPS2020/AIRL/Training/Player_4_Cards_11_games_5000TrainingAgents_['DQL_AIRL', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Train_NegativeReward_notInverted_2020-04-05_16:31:22.781799/Model/reward_iteration_4999_Player_0.hd5" possibleLoadModel1 = [DQLModel, [A2cActor, A2cCritic], [PPOActor,PPOCritic], [AIRLModel,AIRLReward]] loadModelEmpty = "" #Reward function reward = RewardOnlyWinning.RewardOnlyWinning() #Experimental parameters numberOfTrials = 50 maximumScore = 15 # maximumScore to be reached experimentDescriptor = "BaselineExperimentsVsRandom" #Experiment name isLogging = False # create a .txt file with the experiment log isPlotting = False #Create plots of the experiment createDataset = False # Create a .pkl dataset of the experiemnt saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/BaselineNumberGames" # Directory where the experiment will be saved #Metrics to be saved avgTotalGames = [] avgWonGames = [] avgPoints = [] avgWonRounds = [] for a in range(4): avgPoints.append([]) avgWonGames.append([]) avgWonRounds.append([]) columns = ["ExperimentName", "AvgTotalNumberGames", "stdNumberGames", "Player0_AvgPoints", "Player0_stdAvgPoints", "Player0_AvgWonGames", "Player0_stdAvgWonGames", "Player0_AvgWonRounds","Player0_stdWonRounds", "Player1_AvgPoints", "Player1_stdAvgPoints", "Player1_AvgWonGames", "Player1_stdAvgWonGames", "Player1_AvgWonRounds", "Player1_stdWonRounds", "Player2_AvgPoints", "Player2_stdAvgPoints", "Player2_AvgWonGames", "Player2_stdAvgWonGames", "Player2_AvgWonRounds", "Player2_stdWonRounds", "Player3_AvgPoints", "Player3_stdAvgPoints", "Player3_AvgWonGames", "Player3_stdAvgWonGames", "Player3_AvgWonRounds", "Player3_stdWonRounds", ] totalDataFame = pd.DataFrame(columns = columns) for agent in range(4): loadModel = [possibleLoadModel1[agent], loadModelEmpty, loadModelEmpty, loadModelEmpty] # List of agents and Models to Load playersAgents = [possibleAgent1[agent], agent2, agent3, agent4] print ("Evaluating agent:" + str(playersAgents[0].name)) for a in range(numberOfTrials): metrics = ChefsHatExperimentHandler.runExperiment(maximumScore=maximumScore, playersAgents=playersAgents,experimentDescriptor=experimentDescriptor,isLogging=isLogging,isPlotting=isPlotting, createDataset=createDataset,saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward, plots=plotsToGenerate) games = metrics[-1] score = metrics[-2] winner = numpy.argmax(score) for i in range(len(playersAgents)): playerMetric = metrics[2+i] rounds = playerMetric[5] avgPoints[i].append(score[i]) if winner == i: avgWonGames[i].append(games) avgWonRounds[i].append(numpy.mean(rounds)) print("Trial:" + str(a) + "- Games" + str(games) + " - Winner: " + str(winner)) avgTotalGames.append(games) currentDataFrame = [] currentDataFrame.append(playersAgents[0].name) #Trained Agent Name currentDataFrame.append(numpy.mean(avgTotalGames)) # AvgTotalNumberGames currentDataFrame.append(numpy.std(avgTotalGames))# AvgSTDTotalNumberGames for i in range(len(playersAgents)): points = avgPoints[i] wongamesNumber = avgWonGames[i] wonRounds = avgWonRounds[i] currentDataFrame.append(numpy.mean(points)) # Player X AvgPoints currentDataFrame.append(numpy.std(points)) # Player X StdPoints currentDataFrame.append(numpy.mean(wongamesNumber)) # Player X AvgWonGames currentDataFrame.append(numpy.std(wongamesNumber)) # Player X StdWonGames currentDataFrame.append(numpy.mean(wonRounds)) # Player X AvgRounds currentDataFrame.append(numpy.std(wonRounds)) # Player X StdRounds # print ("Player - " + str(i)) # print (" -- Average points:" + str(numpy.mean(points)) + "("+str(numpy.std(points))+")") # print(" -- Average Num Games When Win:" + str(numpy.mean(wongamesNumber)) + "(" + str(numpy.std(wongamesNumber)) + ")") # print(" -- Average Num Rounds When Win:" + str(numpy.mean(roundsWin)) + "(" + str( # numpy.std(roundsWin)) + ")") totalDataFame.loc[-1] = currentDataFrame totalDataFame.index = totalDataFame.index + 1 totalDataFame.to_pickle(saveExperimentsIn+"/"+experimentDescriptor) totalDataFame.to_csv(saveExperimentsIn+"/"+experimentDescriptor + ".csv", index=False, header=True)
def runModel(): #Parameters for the game agent1 = AgentDQL.AgentDQL([False, 1.0]) #training agent agent2 = AgentA2C.AgentA2C([False, 1.0]) agent3 = AgentPPO.AgentPPO([False, 1.0]) agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) # if training specific agents playersAgents = [agent1, agent2, agent3, agent4] reward = RewardOnlyWinning.RewardOnlyWinning() numRuns = 1 #Amount of runs numGames = 100 # amount of games per run experimentDescriptor = "Testing_NoTraining_newPlot" DQLModel = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0_2020-03-26_17:26:18.198600/Model/actor_iteration_999_Player_2.hd5" A2cActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch - Second Best - Best - _2020-03-26_19:11:39.863435/Model/actor_iteration_999_Player_0.hd5" A2cCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch - Second Best - Best - _2020-03-26_19:11:39.863435/Model/critic_iteration_999_Player_0.hd5" PPOActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/PPO/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_9_Training_Best_Agent_3Choice_ Scratch - Scratch - Scratch - _2020-03-26_22:27:02.430568/Model/actor_iteration_999_Player_3.hd5" PPOCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/PPO/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_9_Training_Best_Agent_3Choice_ Scratch - Scratch - Scratch - _2020-03-26_22:27:02.430568/Model/critic_iteration_999_Player_3.hd5" loadModelAgent1 = DQLModel #""#""#"" #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG] loadModelAgent2 = [A2cActor, A2cCritic] #""#""# ""#[actorModel,criticModel] loadModelAgent3 = [PPOActor, PPOCritic] #""#""# "" loadModelAgent4 = "" #""#DQLModelr#""#""# "" # # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is loadModel = [ loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4 ] # indicate where the saved model is # # loadModel = "" #indicate where the saved model is # #Parameters for controling the experiment isLogging = False # Logg the experiment isPlotting = False # plot the experiment plotFrequency = 1 # plot the plots every X games createDataset = True # weather to save the dataset saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/All/AllQValues_beforetraining/" # Directory where the experiment will be saved winsP1 = [] winsP2 = [] winsP3 = [] winsP4 = [] qvalues = [] for a in range(len(playersAgents)): qvalues.append([]) for i in range(numRuns): metrics = ChefsHatExperimentHandler.runExperiment( numGames=numGames, playersAgents=playersAgents, experimentDescriptor=experimentDescriptor, isLogging=isLogging, isPlotting=isPlotting, plotFrequency=plotFrequency, createDataset=createDataset, saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward) # Player1 - agent p1 = metrics[2] p2 = metrics[3] p3 = metrics[4] p4 = metrics[5] winsP1.append(p1[0]) winsP2.append(p2[0]) winsP3.append(p3[0]) winsP4.append(p4[0]) for a in range(len(playersAgents) - 1): qvalues[a].append(metrics[a + 2][-1]) # print ("Metrics:" + str(metrics)) plotVictoriesTotal(winsP1, winsP2, winsP3, winsP4, numGames, experimentDescriptor, saveExperimentsIn)