示例#1
0
def main():
    sys.setrecursionlimit(2000)

    config = Configuration()

    with open(config.DATA_FOLDER + '/config.txt', 'r') as f:
        configFile = f.read().split(',')

    print('Parameters', configFile)
    config.EPSILON_START = float(configFile[0])
    config.LOAD_NET_NUMBER = int(float(configFile[1]))

    agentTF = AgentTF(config.STATE_SIZE, config.PHI_LENGTH, config.ACTION_SIZE,
                      config.HIDDEN_LAYERS, config.BATCH_SIZE, config.TAU,
                      config.GAMMA)

    if config.LOAD_NET_NUMBER > 0:
        dataSet = loadDataSet(config.DATA_FOLDER, config.LOAD_NET_NUMBER)
        agentTF.restore_model(config.DATA_FOLDER)
        countTotalSteps = config.LOAD_NET_NUMBER
    else:
        # Initialize DataSet
        dataSet = DataSet(config.STATE_SIZE, config.REPLAY_MEMORY_SIZE,
                          config.PHI_LENGTH, config.RNG)
        countTotalSteps = 0

        openLearningFile(config.DATA_FOLDER)

    eC = environmentControl(config.PATH_ROBOT, config.PATH_GOAL,
                            config.PATH_LAUNCHFILE)
    eC.spawn(config.ROBOT_NAME)
    eC.spawnGoal()
    eC.setRandomModelState(config.ROBOT_NAME)
    #eC.pause()

    dP = dataProcessor(eC, config.ROBOT_NAME, config.PHI_LENGTH,
                       config.STATE_SIZE, config.NUM_SENSOR_VAL,
                       config.SENSOR_RANGE_MAX, config.SENSOR_RANGE_MIN,
                       config.VEL, config.VEL_CURVE, config.UPDATE_TIME,
                       config.SPEED_UP)

    lastState = np.zeros((1, config.STATE_SIZE))
    lastReward = 0
    lastAction = 0

    countSteps = 0
    batchCount = 0
    lossAverages = np.empty([0])
    epochCount = 0

    epsilon = max(config.EPSILON_START, config.EPSILON_MIN)
    epsilonRate = config.EPSILON_DECAY

    quit = False

    try:
        for i in range(4):
            action = np.random.randint(config.ACTION_SIZE)
            dP.action(action)

            state, reward = dP.getStateReward()
            dataSet.addSample(lastState, action, reward, state, dP.isGoal)
            countTotalSteps += 1
            countSteps += 1
            lastState = state
        if config.EPSILON_START < -0:
            quit = True
        while not quit:
            if countTotalSteps % 1000 == 0:
                updateLearningFile(config.DATA_FOLDER, lossAverages,
                                   countTotalSteps)
                lossAverages = np.empty([0])
                print(countTotalSteps)

            phi = dataSet.phi(lastState)
            action = agentTF.getAction(phi, epsilon)
            #action=userAction()
            eC.unpause()
            dP.action(action)
            state, reward = dP.getStateReward()
            eC.pause()

            if dP.isGoal:
                print('The goal was reached in ', countSteps, ' steps')
                countSteps = 1
                eC.setRandomModelState(config.ROBOT_NAME)
                eC.setRandomModelState('goal')
                dP.isGoal = False

            if dP.flipped:
                eC.setRandomModelState(config.ROBOT_NAME)
                dP.flipped = False

            # After NUM_STEPS the chance is over
            if countSteps % config.NUM_STEPS == 0:
                countSteps = 1
                reward -= 1
                eC.setRandomModelState(config.ROBOT_NAME)
                eC.setRandomModelState('goal')
                print('Your chance is over! Try again ...')

            #print(reward)

            dataSet.addSample(lastState, action, reward, state, dP.isGoal)

            # Training
            if countTotalSteps > config.REPLAY_START_SIZE and countTotalSteps % 5 == 0:
                batchStates, batchActions, batchRewards, batchNextStates, batchTerminals= \
                          dataSet.randomBatch(config.BATCH_SIZE)
                loss = agentTF.train(batchStates, batchActions, batchRewards,
                                     batchNextStates, batchTerminals)
                #print('Loss', loss)
                # count How many trainings had been done
                batchCount += 1
                # add loss to lossAverages
                lossAverages = np.append(lossAverages, loss)

            #Update Epsilon save dataSet, network
            if countTotalSteps % config.SIZE_EPOCH == 0:
                # Number of Epochs
                epochCount += 1

                # Update Epsilon
                if (epsilon - epsilonRate) < config.EPSILON_MIN - 0.01:
                    quit = True
                epsilon = max(epsilon - epsilonRate, config.EPSILON_MIN)
                print('Epsilon updated to: ', epsilon)

                agentTF.save_model(countTotalSteps, config.DATA_FOLDER)
                saveDataSet(config.DATA_FOLDER, countTotalSteps, dataSet)
            lastState = state
            countTotalSteps += 1
            countSteps += 1

    except rospy.exceptions.ROSException:
        agentTF.save_model(countTotalSteps, config.DATA_FOLDER)
        saveDataSet(config.DATA_FOLDER, countTotalSteps, dataSet)
        agentTF.close()
        eC.close()

        with open(config.DATA_FOLDER + '/config.txt', 'w') as f:
            out = "{},{}".format(epsilon, countTotalSteps)
            f.write(out)
示例#2
0
		state,reward=dP.getStateReward()
		action=np.random.randint(config.ACTION_SIZE)
		dP.action(action)
		dataSet.addSample(state,
						  action,
						  reward,
						  dP.isGoal)
		countTotalSteps+=1
		countSteps+=1

	while not quit:
		if countTotalSteps%1000==0:
			print(countTotalSteps)

		state,reward=dP.getStateReward()
		phi=dataSet.phi(state)
		#print('phi: ', phi)
		action=network.choose_action(phi, epsilon)
		#action=np.random.randint(config.ACTION_SIZE)
		#time.sleep(0.5)
		dP.action(action)
		#print('state: ', state)
		#print('reward: ', reward)
		#print('action: ', action)

		# Check every 100 steps if is Flipped and Goal was reached
		if countSteps % 20 == 0:
			if dP.isGoal:
				countSteps = 1
				eC.setRandomModelState(config.ROBOT_NAME)
				eC.setRandomModelState('goal')