Exemplo n.º 1
0
    def main(self):
        actions = {
            'pong': self.on_pong,
            "setup": self.on_setup,
            "registered": self.on_registered
        }
        while True:
            sock = common.connect_to(self.proxy_addr, self.proxy_port)
            if sock is None:
                time.sleep(10)
                continue
            self.logger.info("connected to %s:%d" %
                             (self.proxy_addr, self.proxy_port))
            try:
                common.set_sock_buff_size(sock)
                #sock.setblocking(1)
                sock.settimeout(5)
                self.registered = False
                text = ''
                socks_to_read = [sock]
                send_ping = True
                while True:
                    if self.registered:
                        if send_ping:
                            sock.sendall("ping;")
                            send_ping = False
                        else:
                            send_ping = True
                    else:
                        sock.send("register %s;" % self.my_name)

                    readable, writable, exceptional = select.select(
                        socks_to_read, [], [], 10)
                    if len(readable):
                        try:
                            data = common.recv(sock, common.BUFFER_SIZE)
                        except socket.error:
                            self.logger.error(traceback.format_exc())
                            break
                        if len(data) == 0:
                            self.logger.warn(
                                'read exception, disconnected by remote')
                            break
                        else:
                            text += data
                            while True:
                                action, params, text = common.get_action(text)
                                if action is None:
                                    break
                                if actions.has_key(action):
                                    actions[action](sock, params)

            except:
                self.logger.error(traceback.format_exc())

            common.safe_close_socket(sock)
Exemplo n.º 2
0
 def set_action_group(self, action_group):
     super(ReplaceBar, self).set_action_group(action_group)
     # This is a little gimmnick to make it not throw an exception
     # Since the holders return None when the first argument is
     # None.
     if action_group is None:
         get_action = lambda name: None
     else:
         get_action = lambda name: action_group.get_action(name)
     
     replace_forward = get_action(ACTION_REPLACE_FORWARD)
     replace_all = get_action(ACTION_REPLACE_ALL)
     self.toggle_find = get_action(ACTION_FIND_TOGGLE)
     self.signal_1 = signal_holder(replace_forward, "activate", self.on_replace_curr)
     self.signal_2 = signal_holder(replace_all, "activate", self.on_replace_all)
     
     sbs = subscribe_proxy(replace_forward, self.btn_replace_forward)
     self.replace_forward_src = sbs
     
     sbs = subscribe_proxy(replace_all, self.btn_replace_all)
     self.replace_all_src = sbs
Exemplo n.º 3
0
    def set_action_group(self, action_group):
        super(ReplaceBar, self).set_action_group(action_group)
        # This is a little gimmnick to make it not throw an exception
        # Since the holders return None when the first argument is
        # None.
        if action_group is None:
            get_action = lambda name: None
        else:
            get_action = lambda name: action_group.get_action(name)

        replace_forward = get_action(ACTION_REPLACE_FORWARD)
        replace_all = get_action(ACTION_REPLACE_ALL)
        self.toggle_find = get_action(ACTION_FIND_TOGGLE)
        self.signal_1 = signal_holder(replace_forward, "activate",
                                      self.on_replace_curr)
        self.signal_2 = signal_holder(replace_all, "activate",
                                      self.on_replace_all)

        sbs = subscribe_proxy(replace_forward, self.btn_replace_forward)
        self.replace_forward_src = sbs

        sbs = subscribe_proxy(replace_all, self.btn_replace_all)
        self.replace_all_src = sbs
Exemplo n.º 4
0
 def proc_cmd_connection(self, sck):
     sck_id = str(sck)
     data = common.recv(sck, common.BUFFER_SIZE)
     if len(data):
         text = self.data_of_new_socks[sck_id] + data
         while True:
             action, params, text = common.get_action(text, "\r\n")
             self.data_of_new_socks[sck_id] = text
             if action is None:
                 break
             if self.actions.has_key(action):
                 self.actions[action](sck, params, text)
     else:
         self.__remove_new_sock(sck)
         common.safe_close_socket(sck)
Exemplo n.º 5
0
 def __proc_host_actions(self, actions):
     try:
         data = common.recv(self.base_sck, common.BUFFER_SIZE)
         if len(data):
             text = self.data_from_host + data
             while True:
                 action, params, text = common.get_action(text)
                 if action is None:
                     break
                 if actions.has_key(action):
                     actions[action](params)
             self.data_from_host = text
         else:
             # 与host的连接断开
             self.logger.info(u'与host的连接断开')
             self.running.clear()
     except socket.error, e:
         self.running.clear()
         self.logger.error(traceback.format_exc())
Exemplo n.º 6
0
 def _create_toggle_action(self, action_group):
     action = lambda name: get_action(action_group.get_action, name)
     self.toggle_find = action(ACTION_FIND_TOGGLE)
     self.replace_forward = action(ACTION_REPLACE_FORWARD)
     self.replace_all = action(ACTION_REPLACE_ALL)
     return action(ACTION_REPLACE_TOGGLE)
Exemplo n.º 7
0
 def _create_toggle_action(self, action_group):
     action = lambda name: get_action(action_group.get_action, name)
     return action(ACTION_REPLACE_TOGGLE)
Exemplo n.º 8
0
def test(block,
         args,
         d_args,
         r_args,
         d_module,
         r_module,
         enc,
         dec,
         q=None,
         rank=0):
    import torch
    from torch.autograd import Variable

    from envs import create_env, reset_env, get_obs
    from common import get_action, log

    seed = args.seed * 9823 + 194885 + rank  # make sure doesn't copy train
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    i = 1
    total_acc, total_reward = [], []
    avg_succ, avg_reward, avg_len = 0, 0, 0
    while len(total_acc) < block:
        reward_sum, succ = 0, 0
        actions = []
        if args.single_env and i > 1:
            reset_env(env, args)
        else:
            env = create_env(args.env_name,
                             framework=args.framework,
                             args=args,
                             eval_flag=True)
        done = False
        step = 0

        # Should the two LSTMs share a hidden state?
        cx_r = Variable(torch.zeros(1, r_args.dim))
        hx_r = Variable(torch.zeros(1, r_args.dim))
        if not args.baseline:
            cx_d = Variable(torch.zeros(1, d_args.dim))
            hx_d = Variable(torch.zeros(1, d_args.dim))
        while step < args.max_episode_length and not done:
            # Encode state
            state = get_obs(env, r_args.framework)
            state = Variable(torch.from_numpy(state).float())
            if not args.baseline:
                z = enc(state)
                z_prime_hat = z.unsqueeze(0)
            else:
                z_prime_hat = state.unsqueeze(0)
            actions = []
            if args.mcts:
                z_prime_hat, actions, (hx_r,
                                       cx_r), (hx_d, cx_d), _, _, _ = mcts(
                                           env,
                                           z_prime_hat,
                                           r_module,
                                           d_module,
                                           enc, (hx_r, cx_r), (hx_d, cx_d),
                                           args,
                                           discrete=r_args.discrete,
                                           use_env=args.use_env)
            for r in range(args.rollout - args.d):
                value, logit, (hx_r, cx_r) = r_module(
                    (z_prime_hat, (hx_r, cx_r)))
                action, entropy, log_prob = get_action(
                    logit, discrete=r_args.discrete)
                actions.append(action)
                if not args.baseline:
                    z_prime_hat, _, (hx_d, cx_d) = d_module(
                        (z_prime_hat, z_prime_hat, action, (hx_d, cx_d)))
                    if args.save_figs:
                        s_prime_hat = dec(z_prime_hat)

            for action in actions[:args.rollout]:
                _, reward, done, _ = env.step(action.data.numpy())
                if args.render:
                    env.render()
                reward_sum += reward
                step += 1
                if done:
                    succ = 1
                    break
        U = 1. / i
        total_acc.append(succ)
        total_reward.append(reward_sum)
        avg_succ = avg_succ * (1 - U) + succ * U
        avg_reward = avg_reward * (1 - U) + reward_sum * U
        avg_len = avg_len * (1 - U) + (step + 1) * U
        if i % args.log_interval == 0:
            log("Eval: {:d} episodes, avg succ {:.2f}, avg reward {:.2f}, avg length {:.2f}"
                .format(len(total_acc), avg_succ, reward_sum, step))
        i += 1
    if args.local:
        return (sum(total_acc), len(total_acc), sum(total_reward), avg_len)
    q.put((sum(total_acc), len(total_acc), sum(total_reward)))
Exemplo n.º 9
0
 def _create_toggle_action(self, action_group):
     action = lambda name: get_action(action_group.get_action, name)
     return action(ACTION_REPLACE_TOGGLE)
Exemplo n.º 10
0
def mcts(env, z_hat, r_module, d_module, enc, r_state, d_state, args, discrete,
		 use_env=False):
	import torch
	import torch.nn.functional as F
	from torch.autograd import Variable

	from common import get_action
	from envs import get_obs

	(hx_r, cx_r) = r_state
	(hx_d, cx_d) = d_state
	parent_states = [(z_hat, [], (hx_r, cx_r), (hx_d, cx_d), [], [], [])]
	child_states = []
	init_state = get_obs(env, args.framework)
	for i in range(args.d):
		actions = []
		best_val = None
		for z_hat, trajectory, (hx_r, cx_r), (hx_d, cx_d), val, entropies, \
				logprobs in parent_states:
			if best_val is None:
				best_val = val
			elif val < best_val:
				continue
			value, logit, (hx_r_prime, cx_r_prime) = r_module(
				(z_hat, (hx_r, cx_r)))
			val.append(value)
			if not discrete:
				for b in range(args.b):
					action, entropy, log_prob = get_action(
						logit, discrete=False, v=args.v)
					actions.append((action, entropy, log_prob))
			else:
				prob = F.softmax(logit)
				actions = np.argpartition(prob.data.numpy(), args.b)[:b]
			for a, e, lp in actions:
				if not use_env:
					z_prime_hat, _, (hx_d_prime, cx_d_prime) = d_module(
						(z_hat, z_hat, a, (hx_d, cx_d)))
				else:
					state = get_obs(env, args.framework)
					for t in trajectory:
						env.step(t.data.numpy())
					s_prime, _, _, _ = env.step(a.data.numpy())
					s_prime = Variable(torch.from_numpy(s_prime).float())
					z_prime_hat = enc(s_prime).unsqueeze(0)
					env.reset(state)
					hx_d_prime, cx_d_prime = hx_d, cx_d
				child_states.append(
					(z_prime_hat, trajectory + [a], (hx_r_prime, cx_r_prime),
					(hx_d_prime, cx_d_prime), val, entropies + [e], logprobs + [lp]))
		child_states = prune(child_states, b)
		parent_states = child_states
		child_states = []

	# compute value of final state in each trajectory and choose best
	best_val = sum(parent_states[0][4]).data[0,0]
	best_ind = 0
	for ind, (z, traj, hr, hd, v, _, _) in enumerate(parent_states):
		vr, _, _ = r_module((z, hr))
		v.append(vr)
		if sum(v).data[0,0] > best_val:
			best_ind = ind
			best_val = sum(v).data[0,0]
	return parent_states[best_ind]