示例#1
0
def parse():
    operation = sys.argv[1].split("=")
    left = operation[0]
    right = operation[1]
    degree = -1
    left_degree = utils.get_degree(left)
    if left_degree == -2:
        print(
            'Syntax error, something goes wrong with your entree, please double check it.'
        )
        return -1
    right_degree = utils.get_degree(right)
    if left_degree > right_degree:
        degree = left_degree
    else:
        degree = right_degree
    if degree <= -1:
        print(
            'Syntax error, something goes wrong with your entree, please double check it.'
        )
        return -1
    if degree == 0:
        solver.resolve_zero_degree(left, right)
    elif degree == 1:
        solver.resolve_first_degree(left, right)
    elif degree == 2:
        solver.resolve_second_degree(left, right)
    elif degree > 2:
        print(
            'The Polynomial degree is stricly greater than 2. I can\'t solve.')
示例#2
0
 def __init__(self, in_features, out_features, data, bias=True):
     super(MaskedGCNConv, self).__init__()
     self.in_features = in_features
     self.out_features = out_features
     self.fc = nn.Linear(in_features, out_features, bias=bias)
     self.degree = get_degree(data.edge_list).float().to(device)
     self.dense_adj = data.adj.to_dense().to(device)
     self.sigma = Parameter(torch.Tensor(in_features))
     self.reset_parameters()
示例#3
0
def laplacian_sharpening(data):
    deg = get_degree(data.edge_list) + 1
    source, target = data.edge_list
    weight = -torch.ones(data.edge_list.size(1))
    weight += 3 * (source == target)
    deg_inv_sqrt = torch.pow(deg.to(torch.float), -0.5)
    deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0.0
    v = deg_inv_sqrt[source] * weight * deg_inv_sqrt[target]
    lap_sharp = torch.sparse.FloatTensor(data.edge_list, v)
    return lap_sharp
示例#4
0
 def __init__(self, data, nhid=32, latent_dim=16):
     super(VGAEGRA, self).__init__(data, nhid, latent_dim)
     alpha = 0.95
     A = data.adjmat
     D = get_degree(data.edge_list)
     Dinv = 1 / D.float()
     self.gra = alpha * torch.matmul(
         torch.inverse(
             torch.eye(data.num_nodes) -
             alpha * torch.matmul(A, torch.diag(Dinv))), A)
     norm = self.gra.sum()
     self.gra = self.gra / norm * (data.num_nodes**2)
示例#5
0
    def obs2input2(self):
        nagent = len(self.obs) - np.sum(self.obs, axis=0, dtype=np.int32)[5]
        nenemy = len(self.obs) - nagent
        input = np.zeros([nagent, 68])
        n = 0
        for i in range(len(self.obs)):
            if self.obs[i][5] == 0:
                input[n][0] = self.obs[i][1]
                input[n][1] = self.obs[i][2]
                input[n][2] = self.obs[i][3]
                input[n][3] = self.obs[i][4]
                input[n][4] = self.obs[i][5]
                k = 5
                for j in range(len(self.obs)):
                    if j != i:
                        dis = utils.get_distance(
                            self.obs[i][6], -self.obs[i][7], self.obs[j][6],
                            -self.obs[j][7]) / DISTANCE_FACTOR - 1
                        degree = utils.get_degree(
                            self.obs[i][6], -self.obs[i][7], self.obs[j][6],
                            -self.obs[j][7]) / 180
                        input[n][k] = degree
                        k += 1
                        input[n][k] = dis
                        k += 1
                        input[n][k] = self.obs[j][1]
                        k += 1
                        input[n][k] = self.obs[j][2]
                        k += 1
                        input[n][k] = self.obs[j][3]
                        k += 1
                        input[n][k] = self.obs[j][4]
                        k += 1
                        input[n][k] = self.obs[j][5]
                        k += 1
                n += 1

        return input
示例#6
0
    def obs2input(self):
        n = 0
        for i in range(len(self.obs)):
            if self.obs[i][5] == 0:
                #initilize the input to the model 5+7*9 = 68
                input = np.zeros(68)
                enemy_table = -np.ones(5)
                input[0] = self.obs[i][1]
                input[1] = self.obs[i][2]
                input[2] = self.obs[i][3]
                input[3] = self.obs[i][4]
                input[4] = self.obs[i][5]
                k = 5
                ind_enemy = 0
                for j in range(len(self.obs)):
                    if j != i:
                        dis = utils.get_distance(
                            self.obs[i][6], -self.obs[i][7], self.obs[j][6],
                            -self.obs[j][7]) / DISTANCE_FACTOR - 1
                        degree = utils.get_degree(
                            self.obs[i][6], -self.obs[i][7], self.obs[j][6],
                            -self.obs[j][7]) / 180
                        input[k] = degree
                        k += 1
                        input[k] = dis
                        k += 1
                        input[k] = self.obs[j][1]
                        k += 1
                        input[k] = self.obs[j][2]
                        k += 1
                        input[k] = self.obs[j][3]
                        k += 1
                        input[k] = self.obs[j][4]
                        k += 1
                        input[k] = self.obs[j][5]
                        k += 1

                return input
示例#7
0
def model_eval(args, model, env, random=True, vis=None):
	if vis is not None:
		vis, window_id, fps = vis
		frame_dur = 1.0 / fps
		last_time = time.time()

	rewards, start_time = 0, time.time()
	
	obs = env.reset()
	t = 0
	while True:
		nagent = len(obs) - np.sum(obs, axis=0, dtype=np.int32)[5]
		nenemy = len(obs) - nagent

		action = np.zeros([nagent, env.action_space.shape[0]])
		'''
		if nenemy == 0:
			break
			'''

		n = 0
		for i in range(len(obs)):
			if obs[i][5] == 0:
				input = np.zeros(68)
				enemy_table = -np.ones(5)
				input[0] = obs[i][1]
				input[1] = obs[i][2]
				input[2] = obs[i][3]
				input[3] = obs[i][4]
				input[4] = obs[i][5]
				k = 5
				ind_enemy = 0
				for j in range(len(obs)):
					if j != i:
						dis = utils.get_distance(obs[i][6], -obs[i][7], obs[j][6], -obs[j][7]) / DISTANCE_FACTOR - 1
						degree = utils.get_degree(obs[i][6], -obs[i][7], obs[j][6], -obs[j][7]) / 180
						input[k] = degree
						k += 1
						input[k] = dis
						k += 1
						for l in range(5):
							input[k] = obs[j][l+1]
							k += 1
						if obs[j][5] == 1:
							enemy_table[ind_enemy] = obs[j][0]
							ind_enemy += 1
				pout, _ = model.pi_and_v(Variable(torch.from_numpy(input).float().unsqueeze(0), volatile=True))
				#pout = model.pi_and_v(Variable(torch.from_numpy(input).float().unsqueeze(0), volatile=True))
				command_id = pout.action_indices[0] if random else pout.most_probable_actions[0]
				action[n][0] = obs[i][0]
				if command_id < 5:
					action[n][1] = 1
					action[n][4] = enemy_table[command_id]
				else:
					action[n][1] = -1
					if command_id < 10:
						action[n][2] = (float(command_id) - 5)/4
					else:
						action[n][2] = (float(command_id) - 13)/4
					action[n][3] = 1
				n += 1
		obs, reward, done, _ = env.step(action)
		#print(reward)
		rewards += reward
		if args.save_path is not None:
			with open(os.path.join(args.save_path, 'rewards_eval'), 'a+') as f:
				f.write('{}: {}\n'.format(t, rewards))
		#reward += reward
		if vis is not None and time.time() > last_time + frame_dur:
			pass
		if done:
			break
		t += 1
		if t > 501:
			break

	return EvalResult(rewards, time.time()-start_time)
示例#8
0
def model_eval_commnet(args, model, env, random=True, vis=None):

	if vis is not None:
		vis, window_id, fps = vis
		frame_dur = 1.0 / fps
		last_time = time.time()

	rewards, start_time = 0, time.time()
	
	obs = env.reset()
	t = 0
	nagent_pre = len(obs) - np.sum(obs, axis=0, dtype=np.int32)[5]
	while True:
		nagent = len(obs) - np.sum(obs, axis=0, dtype=np.int32)[5]
		if nagent_pre != nagent:
			#print('reseting the model {} {}'.format(nagent, nagent_pre))
			model.reset_state()
		nagent_pre = nagent
		nenemy = len(obs) - nagent
		#print('nagent: {} | env,episodes_step: {}'.format(nagent, env.episode_steps))
		'''
		if nagent == 0 and env.episode_steps == 0:
			obs = env.reset()
			continue
		elif nagent == 0 and env.episode_steps != 0:
			break
		'''

		action = np.zeros([nagent, env.action_space.shape[0]])
		input = np.zeros([nagent, 68])
		#"enemy table"
		enemy_table = - np.ones(5)
		n = 0
		ind_enemy = 0
		for i in range(len(obs)):
			if obs[i][5] == 0:
				action[n][0] = obs[i][0]
				input[n][0] = obs[i][1]
				input[n][1] = obs[i][2]
				input[n][2] = obs[i][3]
				input[n][3] = obs[i][4]
				input[n][4] = obs[i][5]
				k = 5
				for j in range(len(obs)):
					if j != i:
						dis = utils.get_distance(obs[i][6], -obs[i][7], obs[j][6], -obs[j][7]) / DISTANCE_FACTOR - 1
						degree = utils.get_degree(obs[i][6], -obs[i][7], obs[j][6], -obs[j][7]) / 180
						input[n][k] = degree
						k += 1
						input[n][k] = dis
						k += 1
						for l in range(5):
							input[n][k] = obs[j][l+1]
							k += 1

				n += 1
			else:
				enemy_table[ind_enemy] = obs[i][0]
				ind_enemy += 1
		#print(input)
		if len(input) != 0:
			#print(input)
			pout, _ = model.pi_and_v(Variable(torch.from_numpy(input).float()))
		for i in range(nagent):
			command_id = pout.action_indices[i] if random else pout.most_probable_actions[i]
			if command_id < 5:
				action[i][1] = 1
				action[i][4] = enemy_table[command_id]
			else:
				action[i][1] = -1
				if command_id < 10:
					action[i][2] = (float(command_id) - 5)/4
				else:
					action[i][2] = (float(command_id) - 13)/4
				action[i][3] = 1
		obs, reward, done, _ = env.step(action)
		#print('Reward: {} | done: {}'.format(reward, done))
		rewards += reward
		if args.save_path is not None:
			with open(os.path.join(args.save_path, 'rewards_eval'), 'a+') as f:
				f.write('{}: {}\n'.format(t, rewards))
		#reward += reward
		if vis is not None and time.time() > last_time + frame_dur:
			pass
		if done:
			break
		t += 1
		if t > 501:
			break

	return EvalResult(rewards, time.time()-start_time)
示例#9
0
    def act(self, args):

        self.model.load_state_dict(self.shared_model.state_dict())
        self.model.train()
        if self.done:
            self.reset_state()

        log_probs, entropies, rewards, values, actions = [], [], [], [], []
        for _ in range(self.t_max):

            nagent = len(self.obs) - np.sum(self.obs, axis=0,
                                            dtype=np.int32)[5]
            nenemy = len(self.obs) - nagent
            #print('Length of obs {}, number of agent is {}, number of enemy is {}').format(len(self.obs), nagent, nenemy)

            action = np.zeros([nagent, self.env.action_space.shape[0]])

            n = 0
            for i in range(len(self.obs)):
                if self.obs[i][5] == 0:
                    # initilize the input for model(the independent) 5+7*9=68
                    input = np.zeros(68)
                    enemy_table = -np.ones(5)
                    input[0] = self.obs[i][1]
                    input[1] = self.obs[i][2]
                    input[2] = self.obs[i][3]
                    input[3] = self.obs[i][4]
                    input[4] = self.obs[i][5]
                    k = 5
                    ind_enemy = 0
                    for j in range(len(self.obs)):
                        if j != i:
                            dis = utils.get_distance(
                                self.obs[i][6], -self.obs[i][7], self.obs[j]
                                [6], -self.obs[j][7]) / DISTANCE_FACTOR - 1
                            degree = utils.get_degree(
                                self.obs[i][6], -self.obs[i][7],
                                self.obs[j][6], -self.obs[j][7]) / 180
                            input[k] = degree
                            k += 1
                            input[k] = dis
                            k += 1
                            input[k] = self.obs[j][1]
                            k += 1
                            input[k] = self.obs[j][2]
                            k += 1
                            input[k] = self.obs[j][3]
                            k += 1
                            input[k] = self.obs[j][4]
                            k += 1
                            input[k] = self.obs[j][5]
                            k += 1
                            if self.obs[j][5] == 1:
                                enemy_table[ind_enemy] = self.obs[j][0]
                                ind_enemy += 1

                    pout, vout = self.model.pi_and_v(
                        Variable(torch.from_numpy(input).float().unsqueeze(0)))

                    action[n][0] = self.obs[i][0]
                    command_id = pout.action_indices[0]
                    #self.sum.add_scalar_value('command', command_id)
                    with open(os.path.join(args.save_path, 'command_id'),
                              'a+') as f:
                        f.write('{}\n'.format(command_id))
                    if command_id < 5:
                        action[n][1] = 1
                        action[n][4] = enemy_table[command_id]
                    else:
                        action[n][1] = -1
                        if command_id < 10:
                            action[n][2] = (float(command_id) - 5) / 4
                        else:
                            action[n][2] = (float(command_id) - 13) / 4
                        action[n][3] = 1

                    n += 1
                    log_probs.append(pout.sampled_actions_log_probs)
                    entropies.append(pout.entropy)
                    values.append(vout)
            self.obs, reward, done, _ = self.env.step(action)

            if action is not None:
                n = len(action)
            else:
                n = 0
            for i in range(n):
                rewards.append(reward)

            if done:
                self.done = done
                break
            if self.env.episode_steps == self.env.max_episode_steps:
                self.done = True
                break
        R = 0
        input_one_agent = self.obs2input()
        if not self.done and self.obs is not None and input_one_agent is not None:
            _, vout = self.model.pi_and_v(
                Variable(
                    torch.from_numpy(input_one_agent).float().unsqueeze(0)))
            R = float(vout.data.numpy())
        else:
            self.model.reset_state()

        t = len(rewards)
        if t == 0:
            return t

        pi_loss, v_loss = 0, 0
        for i in reversed(range(t)):
            R = self.gamma * R + rewards[i]
            v = values[i]

            advantage = R - float(v.data.numpy()[0, 0])
            #print('R:{}  v:{}'.format(R, v))
            # Accumulate gradients of policy
            log_prob = log_probs[i]
            entropy = entropies[i]
            # Log probability is increased proportionally to advantage
            pi_loss -= log_prob * advantage
            # Entropy is maximized
            pi_loss -= self.beta * entropy
            # Accumulate gradients of value function
            v_loss += (v - R).pow(2).div_(2)
            #self.sum.add_scalar_value('r', rewards[i])
            #self.sum.add_scalar_value('v', v.data[0,0])
            #self.sum.add_scalar_value('R', R)
            #self.sum.add_scalar_value('Advantage', advantage)
        if self.pi_loss_coef != 1.0:
            pi_loss *= self.pi_loss_coef

        if self.v_loss_coef != 1.0:
            v_loss *= self.v_loss_coef
        # Normalize the loss of sequences trunctated by terminal states
        if self.keep_loss_scale_same and t < self.t_max:
            factor = self.t_max / t
            pi_loss *= factor
            v_loss *= factor

        total_loss = pi_loss + v_loss
        #print('total_loss:{}'.format(total_loss))

        # Compute gradients using thread-specific model
        self.optimizer.zero_grad()
        total_loss.backward()
        torch.nn.utils.clip_grad_norm(self.model.parameters(), 40)
        # Copy the gradients to the globally shared model
        ensure_shared_grads(self.model, self.shared_model, self.process_idx)
        self.optimizer.step()

        self.model.unchain_backward()
        #self.sum.add_scalar_value('total_loss', total_loss.data[0,0])

        return t
示例#10
0
    def act(self, obs):
        nagent = len(obs) - np.sum(obs, axis=0, dtype=np.int32)[5]
        nenemy = len(obs) - nagent

        action = np.zeros([nagent, self.action_space.shape[0]])
        if nenemy == 0:
            return None

        n = 0
        for i in range(len(obs)):
            if obs[i][5] == 0:
                # initilize the input to the model 5+7*9 = 68
                input = np.zeros(68)
                enemy_table = -np.ones(5)
                input[0] = obs[i][1]
                input[1] = obs[i][2]
                input[2] = obs[i][3]
                input[3] = obs[i][4]
                input[4] = obs[i][5]
                k = 5
                ind_enemy = 0
                for j in range(len(obs)):
                    if j != i:
                        dis = utils.get_distance(
                            obs[i][6], -obs[i][7], obs[j][6],
                            -obs[j][7]) / DISTANCE_FACTOR - 1
                        degree = utils.get_degree(obs[i][6], -obs[i][7],
                                                  obs[j][6], -obs[j][7]) / 180
                        input[k] = degree
                        k += 1
                        input[k] = dis
                        k += 1
                        # hp[0,100]
                        input[k] = obs[j][1]
                        k += 1
                        # sheild[0,100]
                        input[k] = obs[j][2]
                        k += 1
                        # cooldown[0,1]
                        input[k] = obs[j][3]
                        k += 1
                        # fround range[0,1]
                        input[k] = obs[j][4]
                        k += 1
                        # is enemy, 0 for myself 1 for enemy
                        input[k] = obs[j][5]
                        k += 1
                        if obs[j][5] == 1:
                            enemy_table[ind_enemy] = obs[j][0]
                            ind_enemy += 1
                act = select_action(input)

                action[n][0] = obs[i][0]
                #action[n][1] = 1
                #action[n][2] = 0
                action[n][3] = 1
                action[n][4] = -1
                if act[0, 0] == 0:
                    action[n][1] = 1
                    action[n][4] = enemy_table[0]
                elif act[0, 0] == 1:
                    action[n][1] = 1
                    action[n][4] = enemy_table[1]
                elif act[0, 0] == 2:
                    action[n][1] = 1
                    action[n][4] = enemy_table[2]
                elif act[0, 0] == 3:
                    action[n][1] = 1
                    action[n][4] = enemy_table[3]
                elif act[0, 0] == 4:
                    action[n][1] = 1
                    action[n][4] = enemy_table[4]
                else:
                    action[n][1] = -1
                    if act[0, 0] == 5:
                        action[n][2] = 0
                    elif act[0, 0] == 6:
                        action[n][2] = 0.25
                    elif act[0, 0] == 7:
                        action[n][2] = 0.5
                    elif act[0, 0] == 8:
                        action[n][2] = 0.75
                    elif act[0, 0] == 9:
                        action[n][2] = 1
                    elif act[0, 0] == 10:
                        action[n][2] = -0.75
                    elif act[0, 0] == 11:
                        action[n][2] = -0.5
                    elif act[0, 0] == 12:
                        action[n][2] = -0.25

                n = n + 1
        #for i in range(n-1):
        #	model.rewards.append(0)

        return action