예제 #1
0
	def predict(self,state):
		ghost1_state_number=utils.state_2_number(state[2:4])
		ghost2_state_number=utils.state_2_number(state[4:6])
		ghost1_next=[]
		ghost1_next_probs=[]
		ghost2_next=[]
		ghost2_next_probs=[]
		for index,x in enumerate(self.model[ghost1_state_number]):
			if x>0:
				ghost1_next.append(index)
				ghost1_next_probs.append(x)
		for index,x in enumerate(self.model[ghost2_state_number]):
			if x>0:
				ghost2_next.append(index)
				ghost2_next_probs.append(x)
		li=[]
		li_probs=[]
		for index1,g1 in enumerate(ghost1_next):
			for index2,g2 in enumerate(ghost2_next):
				g1_state=utils.number_2_state(g1)
				g2_state=utils.number_2_state(g2)
				li.append(g1_state+g2_state)
				li_probs.append(ghost1_next_probs[index1]*ghost2_next_probs[index2])

		return li,li_probs
예제 #2
0
def run(run_ID):
	states,next_states=load_synthetic_data(5*49)
	ghost_tabular_array=numpy.zeros(49*49).reshape((49,49))
	for x,y in zip(states,next_states):
		numberS=utils.state_2_number(x)
		numberSp=utils.state_2_number(y)
		ghost_tabular_array[numberS,numberSp]=ghost_tabular_array[numberS,numberSp]+1
	for index in range(ghost_tabular_array.shape[0]):
		ghost_tabular_array[index,:]=ghost_tabular_array[index,:]/(numpy.sum(ghost_tabular_array[index,:]))
	numpy.savetxt("learn_tabular_models/tabular"+str(run_ID)+".h5",ghost_tabular_array)
예제 #3
0
        for y in range(7):
            s = [x, y]
            for _ in range(each):
                while True:
                    case = numpy.random.randint(0, 4)
                    if case == 0:
                        s_p = [x + 1, y]
                    elif case == 1:
                        s_p = [x, y + 1]
                    elif case == 2:
                        s_p = [x - 1, y]
                    elif case == 3:
                        s_p = [x, y - 1]
                    if numpy.min(s_p) >= 0 and numpy.max(s_p) <= 6:
                        break
                li_s.append(s)
                li_sprime.append(s_p)
    return li_s, li_sprime


states, next_states = load_synthetic_data(5 * 49)
ghost_tabular_array = numpy.zeros(49 * 49).reshape((49, 49))
for x, y in zip(states, next_states):
    numberS = utils.state_2_number(x)
    numberSp = utils.state_2_number(y)
    ghost_tabular_array[numberS,
                        numberSp] = ghost_tabular_array[numberS, numberSp] + 1
for index in range(ghost_tabular_array.shape[0]):
    ghost_tabular_array[index, :] = ghost_tabular_array[index, :] / (numpy.sum(
        ghost_tabular_array[index, :]))
numpy.savetxt("tabular.h5", ghost_tabular_array)
예제 #4
0
    def predict(self, state, action):
        action_array = numpy.array(4 * [0]).reshape(1, 4)
        action_array[0, action] = 1

        ghost_state = state[2:]
        ghost_state_array = numpy.array(ghost_state).reshape(
            1, len(ghost_state))

        pacman_state = state[0:2]
        pacman_state_array = numpy.array(pacman_state).reshape(
            1, len(pacman_state))

        li_ghosts = self.em_model_object.predict(ghost_state_array)
        li_next_states = []
        li_rewards = []
        li_dones = []

        if self.type == 'stochastic':
            pacman_next_state = self.other_models_object.pacman_model.predict(
                [pacman_state_array, action_array])
            for index, gh in enumerate(li_ghosts):
                next_state = numpy.concatenate((pacman_next_state, gh), axis=1)
                reward = self.other_models_object.reward_model.predict(
                    next_state)
                done = self.other_models_object.done_model.predict(next_state)
                li_next_states.append(next_state[0].tolist())
                li_rewards.append(reward[0, 0])
                if done > 0.5:
                    li_dones.append(True)
                else:
                    li_dones.append(False)
            return li_next_states, li_rewards, li_dones

        elif self.type == 'deterministic':
            pacman_next_state = self.other_models_object.pacman_model.predict(
                [pacman_state_array, action_array])
            ghost_next_state = self.other_models_object.ghosts_model.predict(
                ghost_state_array)
            next_state = numpy.concatenate(
                (pacman_next_state, ghost_next_state), axis=1)
            reward = self.other_models_object.reward_model.predict(next_state)
            done = self.other_models_object.done_model.predict(next_state)
            li_next_states.append(next_state[0].tolist())
            li_rewards.append(reward[0, 0])
            if done > 0.5:
                li_dones.append(True)
            else:
                li_dones.append(False)
            return li_next_states, li_rewards, li_dones
        elif self.type == 'tabular':
            state_number = utils.state_2_number(ghost_state)
            li_next_states = []
            for j in range(49 * 49):
                if self.other_models_object.ghosts_tabular_model[state_number,
                                                                 j] > 0:
                    pacman_next_state = self.other_models_object.pacman_model.predict(
                        [pacman_state_array, action_array])
                    ghost_next_state = numpy.array(
                        utils.number_2_state(j)).reshape(1, 4)
                    next_state = numpy.concatenate(
                        (pacman_next_state, ghost_next_state), axis=1)
                    reward = self.other_models_object.reward_model.predict(
                        next_state)
                    done = self.other_models_object.done_model.predict(
                        next_state)
                    li_next_states.append(next_state[0].tolist())
                    li_rewards.append(reward[0, 0])
                    if done > 0.5:
                        li_dones.append(True)
                    else:
                        li_dones.append(False)
                    #print("hihoo!")
            return li_next_states, li_rewards, li_dones
        elif self.type == 'random':
            'planner is random'
            sys.exit(1)