def agent():
    env = RemoteEnv(IN_PORT=8080, OUT_PORT=7070, host="127.0.0.1")
    env.open(0)
    for i in range(10000000):
        #state = env.step("restart")
        state = env.step("restart")
        prev_energy = state['energy']
        done = state['done']
        print("OK")
        while not done:
            action = int(input('action'))
            #action = np.random.choice([0, 1])
            reward_sum = 0
            touched = np.zeros(8)
            for i in range(8):
                state = env.step("act", action)
                energy = state['energy']
                touched[i] = state['touched']
                reward_sum += state['reward']
                if state['done']:
                    break
            done = state['done']
            prev_energy = energy
            frame = state['frame']
            frame = image_decode(state['frame'], 20, 20)
            print(frame)
            print('Reward: ', reward_sum)
            print('Touched: ', touched)
            print('Signal: ', state['signal'])
            print('Done: ', state['done'])
            prev_energy = energy
        if i >= 2:
            break
    env.close()
def get_frame_from_fields(fields, touchs):
	imgdata = image_decode(fields['frame'], 20, 20)
	proprioceptions = np.zeros(ARRAY_SIZE) + 0.0001
	for i in range(TOUCH_SIZE):
		proprioceptions[i] = touchs[i]
	proprioception = np.array(proprioceptions, dtype=np.float32)
	imgdata = np.vstack([imgdata, proprioception])
	return imgdata
def get_frame_from_fields(frame, touchs):
	imgdata = image_decode(frame, 20, 20, np.uint8)
	proprioceptions = np.zeros(ARRAY_SIZE)
	for i in range(TOUCH_SIZE):
		proprioceptions[i] = touchs[i]
	proprioception = np.array(proprioceptions, dtype=np.uint8)
	imgdata = np.vstack([imgdata, proprioception])
	return imgdata
示例#4
0
    def reset(self, env):
        self.h = deque(maxlen=H_SIZE)
        env_info = env.remoteenv.step("restart")
        time.sleep(1)
        for i in range(np.random.choice(15)):
            env_info = env.one_step(np.random.choice([0, 1, 2]))

        f = image_decode(env_info['frame'], 20, 20, dtype=np.float32)
        for _ in range(H_SIZE):
            self.h.append(f)
        return Agent.__make_state__(self.h, np.array(env_info['reward_hist']))
示例#5
0
 def reset(self, env):
     env.remoteenv.step("SetNMoves", 1)
     env_info = env.remoteenv.step("restart")
     for i in range(np.random.choice(15)):
         env_info = env.one_step(np.random.choice([0, 1, 2]))
     img = image_decode(env_info['frame'], 20, 20)
     self.seq.append(img)
     self.seq.append(img)
     self.seq.append(img)
     self.seq.append(img)
     return Agent.__make_state__(self.seq)
示例#6
0
 def act(self, env, action, info=None):
     envinfo = env.one_step(action)
     sum_reward = envinfo['reward']
     for k in range(1):
         if envinfo['done']:
             break
         else:
             envinfo = env.one_step(3)
             sum_reward += envinfo['reward']
     f = image_decode(envinfo['frame'], 20, 20, dtype=np.float32)
     self.h.append(f)
     return Agent.__make_state__(self.h, np.array(
         envinfo['reward_hist'])), sum_reward, envinfo['done'], envinfo
def get_frame_from_fields(fields):
    imgdata = image_decode(fields['frame'], 20, 20)
    return imgdata
示例#8
0
def transform(frame):
    return image_decode(frame, 20, 20)
示例#9
0
 def step(self, action):
     obs, reward, done, info = self.env.step(action)
     return image_decode(obs), reward, done, info
示例#10
0
 def reset(self):
     state = self.env.reset()
     self.state = image_decode(state)
示例#11
0
def to_image(img):
    imgdata = image_decode(img, 0, 0)
    return imgdata
示例#12
0
 def act(self, env, action, info=None):
     reward_sum, done, env_info = self.__step(env, action)
     img = image_decode(env_info['frame'], 20, 20)
     self.seq.append(img)
     frameseq = Agent.__make_state__(self.seq)
     return frameseq, reward_sum, done, env_info