def main(): sys.setrecursionlimit(2000) config = Configuration() with open(config.DATA_FOLDER + '/config.txt', 'r') as f: configFile = f.read().split(',') print('Parameters', configFile) config.EPSILON_START = float(configFile[0]) config.LOAD_NET_NUMBER = int(float(configFile[1])) agentTF = AgentTF(config.STATE_SIZE, config.PHI_LENGTH, config.ACTION_SIZE, config.HIDDEN_LAYERS, config.BATCH_SIZE, config.TAU, config.GAMMA) if config.LOAD_NET_NUMBER > 0: dataSet = loadDataSet(config.DATA_FOLDER, config.LOAD_NET_NUMBER) agentTF.restore_model(config.DATA_FOLDER) countTotalSteps = config.LOAD_NET_NUMBER else: # Initialize DataSet dataSet = DataSet(config.STATE_SIZE, config.REPLAY_MEMORY_SIZE, config.PHI_LENGTH, config.RNG) countTotalSteps = 0 openLearningFile(config.DATA_FOLDER) eC = environmentControl(config.PATH_ROBOT, config.PATH_GOAL, config.PATH_LAUNCHFILE) eC.spawn(config.ROBOT_NAME) eC.spawnGoal() eC.setRandomModelState(config.ROBOT_NAME) #eC.pause() dP = dataProcessor(eC, config.ROBOT_NAME, config.PHI_LENGTH, config.STATE_SIZE, config.NUM_SENSOR_VAL, config.SENSOR_RANGE_MAX, config.SENSOR_RANGE_MIN, config.VEL, config.VEL_CURVE, config.UPDATE_TIME, config.SPEED_UP) lastState = np.zeros((1, config.STATE_SIZE)) lastReward = 0 lastAction = 0 countSteps = 0 batchCount = 0 lossAverages = np.empty([0]) epochCount = 0 epsilon = max(config.EPSILON_START, config.EPSILON_MIN) epsilonRate = config.EPSILON_DECAY quit = False try: for i in range(4): action = np.random.randint(config.ACTION_SIZE) dP.action(action) state, reward = dP.getStateReward() dataSet.addSample(lastState, action, reward, state, dP.isGoal) countTotalSteps += 1 countSteps += 1 lastState = state if config.EPSILON_START < -0: quit = True while not quit: if countTotalSteps % 1000 == 0: updateLearningFile(config.DATA_FOLDER, lossAverages, countTotalSteps) lossAverages = np.empty([0]) print(countTotalSteps) phi = dataSet.phi(lastState) action = agentTF.getAction(phi, epsilon) #action=userAction() eC.unpause() dP.action(action) state, reward = dP.getStateReward() eC.pause() if dP.isGoal: print('The goal was reached in ', countSteps, ' steps') countSteps = 1 eC.setRandomModelState(config.ROBOT_NAME) eC.setRandomModelState('goal') dP.isGoal = False if dP.flipped: eC.setRandomModelState(config.ROBOT_NAME) dP.flipped = False # After NUM_STEPS the chance is over if countSteps % config.NUM_STEPS == 0: countSteps = 1 reward -= 1 eC.setRandomModelState(config.ROBOT_NAME) eC.setRandomModelState('goal') print('Your chance is over! Try again ...') #print(reward) dataSet.addSample(lastState, action, reward, state, dP.isGoal) # Training if countTotalSteps > config.REPLAY_START_SIZE and countTotalSteps % 5 == 0: batchStates, batchActions, batchRewards, batchNextStates, batchTerminals= \ dataSet.randomBatch(config.BATCH_SIZE) loss = agentTF.train(batchStates, batchActions, batchRewards, batchNextStates, batchTerminals) #print('Loss', loss) # count How many trainings had been done batchCount += 1 # add loss to lossAverages lossAverages = np.append(lossAverages, loss) #Update Epsilon save dataSet, network if countTotalSteps % config.SIZE_EPOCH == 0: # Number of Epochs epochCount += 1 # Update Epsilon if (epsilon - epsilonRate) < config.EPSILON_MIN - 0.01: quit = True epsilon = max(epsilon - epsilonRate, config.EPSILON_MIN) print('Epsilon updated to: ', epsilon) agentTF.save_model(countTotalSteps, config.DATA_FOLDER) saveDataSet(config.DATA_FOLDER, countTotalSteps, dataSet) lastState = state countTotalSteps += 1 countSteps += 1 except rospy.exceptions.ROSException: agentTF.save_model(countTotalSteps, config.DATA_FOLDER) saveDataSet(config.DATA_FOLDER, countTotalSteps, dataSet) agentTF.close() eC.close() with open(config.DATA_FOLDER + '/config.txt', 'w') as f: out = "{},{}".format(epsilon, countTotalSteps) f.write(out)
state,reward=dP.getStateReward() action=np.random.randint(config.ACTION_SIZE) dP.action(action) dataSet.addSample(state, action, reward, dP.isGoal) countTotalSteps+=1 countSteps+=1 while not quit: if countTotalSteps%1000==0: print(countTotalSteps) state,reward=dP.getStateReward() phi=dataSet.phi(state) #print('phi: ', phi) action=network.choose_action(phi, epsilon) #action=np.random.randint(config.ACTION_SIZE) #time.sleep(0.5) dP.action(action) #print('state: ', state) #print('reward: ', reward) #print('action: ', action) # Check every 100 steps if is Flipped and Goal was reached if countSteps % 20 == 0: if dP.isGoal: countSteps = 1 eC.setRandomModelState(config.ROBOT_NAME) eC.setRandomModelState('goal')