예제 #1
0
    def configure(self, environment_definitions, port_inc=0):
        self.action_space = spaces.Discrete(
            environment_definitions['action_shape'][0])
        min_value = environment_definitions['min_value']
        max_value = environment_definitions['max_value']
        state_shape = environment_definitions['state_shape']
        state_type = environment_definitions['state_type']
        self.observation_space = spaces.Box(low=min_value,
                                            high=max_value,
                                            shape=state_shape,
                                            dtype=state_type)
        self.n_envs = environment_definitions['n_envs']
        self.actions = environment_definitions['actions']
        if 'action_meaning' in environment_definitions:
            self.action_meaning = environment_definitions['action_meaning']
        else:
            self.action_meaning = ['action'] * len(self.actions)

        if 'agent' in environment_definitions:
            if inspect.isclass(environment_definitions['agent']):
                self.agent = environment_definitions['agent']()
            else:
                raise Exception("Agent object is not a class!!!!")
        else:
            self.agent = BasicAgent()

        host = environment_definitions['host']
        input_port = environment_definitions['input_port']
        output_port = environment_definitions['output_port']
        self.remoteenv = RemoteEnv(host, output_port + port_inc,
                                   input_port + port_inc)
        self.remoteenv.open(0)
        self.nlives = 1
        self.configureFlag = True
        self.state = None
예제 #2
0
def agent():
    env = RemoteEnv(IN_PORT=8080, OUT_PORT=7070)
    env.open(0)
    for i in range(10000000):
        sum_energy = 0
        state = env.step("restart")
        prev_energy = state['energy']
        print(prev_energy)
        done = state['done']
        while not done:
            frame = image_decode(state['frame'], 20, 20)
            print(frame)
            action = int(input('action'))
            for i in range(8):
                state = env.step("act", action)
            state = env.step('get_status')
            done = state["done"]
            energy = state['energy']
            frame = state['frame']
            touched = state['touched']
            ID = state['id']
            delta = (energy - prev_energy)
            print(delta)
            sum_energy += delta
            prev_energy = energy
        print(sum_energy)
    env.close()
예제 #3
0
def train():
    actions = [("fx", 0.1), ("fx", -0.1), ("fz", 0.1), ("fz", -0.1)]
    agent = KerasDQNAgent(build_model, action_size=4, decay_steps=20000)
    agent.eps_min = 0.001
    t = Thread(target=run_worker, args=(agent, ))
    t.start()
    env = RemoteEnv()
    env.open(0)
    fields = None
    total_steps = 0
    for e in range(1000000):
        print("Starting new episode ", e)
        fields = env.step("restart")
        done = fields['done']
        reward = 0
        initial_state = get_state_from_fields(fields).reshape(1, 7)
        steps = 0
        sum_rewards = 0
        while not done:
            if total_steps >= 10000:
                agent.training = True
                action = agent.predict(initial_state)
            else:
                action = agent.predict(initial_state, True)

            reward = 0.0

            for _ in range(4):
                fields = env.step(actions[action][0], actions[action][1])
                reward += fields['reward']
                if fields['done'] == True:
                    break
            final_state = get_state_from_fields(fields).reshape(1, 7)

            reward = np.clip(reward, -1.0, 1.0)
            sum_rewards += reward
            done = fields['done']
            if total_steps % 20 == 0:
                print("\nLOSS ", agent.last_loss, "\n")

            agent.add_sample(initial_state, action, final_state, reward, done)

            if steps >= 2000 and not done:
                done = True
            if not done:
                initial_state = np.copy(final_state)

            steps += 1
            total_steps += 1

        print("Sum of rewards ", sum_rewards, ", Steps by episode: ", steps,
              ", Total Steps: ", total_steps, ", EPS: ", agent.eps,
              ", TOTAL SAMPLES: ", agent.mem_size())
        if e % 50 == 0:
            agent.model.save_weights("model.h5", save_format='h5')

    agent.done = True
    agent.training = False
    env.close()
예제 #4
0
def agent():
    env = RemoteEnv(IN_PORT=8081, OUT_PORT=7071)
    env.open(0)
    for i in range(10000000):
        state = env.step("restart")
        done = state['done']
        print('new episode')
        while not done:
            frame = image_from_str(state['state'], 10, 10)
            print(frame)
            action = int(input('action'))
            state = env.step("move", action)
            done = state['done']
            print(state)
    env.close()
예제 #5
0
def run_agent(inport, outport):
	env = RemoteEnv(IN_PORT=inport, OUT_PORT=outport)
	env.open(0)
	env.step("restart")
	speed = 100
	angular_speed = 50

	actions = [('walk', 15), ('run', 30), ('walk_in_circle', 1), ('left_turn', 1), ('right_turn', 1), ('up', 1),
				('down', 1), ('jump', True), ('pickup', True), ('pickup', False), ('noop', -1)]

	action_size = len(actions)
		
	for i in range(100000):		
		sum_rewards = 0
		touchID = 0
		energy = 0
		#idx = np.random.choice(len(actions))
		idx = int(input())
		for i in range(8):
			env_info = env.step(actions[idx][0], actions[idx][1])
			done = env_info['done']
			if done:
				break
		if not done:
			env_info = env.step('get_result', -1)
			sum_rewards += env_info['reward']
			touchID = env_info['touchID']
			energy = env_info['energy']
		if done:
			sum_rewards += env_info['reward']
			touchID = env_info['touchID']
			energy = env_info['energy']
			env_info = env.step('restart', -1)
		print("Object touched ---------------- ",  touchID)
		print("Reward sum -------------------- ", sum_rewards)
		print("Done ---------------------------", env_info['done'])
		print("--------------------------------------------------------------------------------------")
		print_matrix(get_frame_from_fields(env_info['frame']))
		print("-----------------------------------------------")
	env.close()
예제 #6
0
 def configure(self, environment_definitions, port_inc=0):
     self.action_space = spaces.Discrete(
         environment_definitions['action_shape'][0])
     min_value = environment_definitions['min_value']
     max_value = environment_definitions['max_value']
     state_shape = environment_definitions['state_shape']
     state_type = environment_definitions['state_type']
     self.observation_space = spaces.Box(low=min_value,
                                         high=max_value,
                                         shape=state_shape,
                                         dtype=state_type)
     self.n_envs = environment_definitions['n_envs']
     self.actions = environment_definitions['actions']
     self.action_meaning = environment_definitions['action_meaning']
     self.state_wrapper = environment_definitions['state_wrapper']
     host = environment_definitions['host']
     input_port = environment_definitions['input_port']
     output_port = environment_definitions['output_port']
     self.remoteenv = RemoteEnv(host, output_port + port_inc,
                                input_port + port_inc)
     self.remoteenv.open(0)
     self.nlives = 1
예제 #7
0
def agent():
    env = RemoteEnv(IN_PORT=8085, OUT_PORT=7075)
    env.open(0)
    for i in range(10000000):
        state = env.step("restart")
        prev_energy = state['energy']
        done = state['done']
        while not done:
            action = int(input('action'))
            #action = np.random.choice([0, 1])
            reward_sum = 0
            touched = -1
            for i in range(8):
                state = env.step("act", action)
                state = env.step('get_status')
                energy = state['energy']
                if touched == -1:
                    touched = state['touched']
                elif not state['touched'] in [0, 1]:
                    touched = state['touched']
                if state['done']:
                    break
            done = state['done']
            reward_sum += (energy - prev_energy)
            prev_energy = energy
            frame = state['frame']
            ID = state['id']
            frame = image_decode(state['frame'], 20, 20)
            print(frame)
            print(reward_sum)
            print(touched)
            print(state['signal'])
            print(state['done'])
            prev_energy = energy
        if i >= 2:
            break
    env.close()
예제 #8
0
from unityremote.core import RemoteEnv
import numpy as np

env = RemoteEnv()
env.open(0)

if __name__ == "__main__":
    for i in range(10):
        sum_reward = 0
        state = env.step("restart")
        print(state)
        done = state['done']
        while not done:
            frame = None
            action = int(
                input("action-----------------------------------------"))
            if action == 0:
                state = env.step("fx", 0.1)
            elif action == 1:
                state = env.step("fz", 0.1)
            else:
                state = env.step("noop", 0.0)

            done = state["done"]
            reward = state['reward']
            sum_reward += reward
        print(sum_reward)
env.close()