Exemplo n.º 1
0
    def get_val_result(self, validation_graph):

        objective_vals = []
        for g in validation_graph:
            env = MVC_environement(g)
            Xv, graph = env.reset_env()
            graph = torch.unsqueeze(graph, 0)
            Xv = Xv.clone()
            Xv = Xv.cuda()
            graph = graph.to(self.device)
            done = False
            self.non_selected = list(np.arange(env.num_nodes))
            self.selected = []
            while done == False:
                #Xv = Xv.cuda()
                Xv = Xv.to(self.device)
                val = self.forward(graph, Xv)[0]
                #val[selected] = -float('inf')
                #print(val)
                #action = int(torch.argmax(val).item())
                action = self.take_action(graph, Xv, is_validation=True)

                Xv_next, reward, done = env.take_action(action)
                #non_selected.remove(action)
                #selected.append(action)
                Xv = Xv_next
            #print(selected)
            objective_vals.append(len(self.selected))
        return sum(objective_vals) / len(objective_vals)
    def train(self, g, num_eps=20):

        N_STEP = 2
        fitted_q_exp = namedtuple("fitted_exp",
                                  ['graph', 'Xv', 'action', 'reward'])
        experience = namedtuple(
            "experience",
            ['graph', 'Xv', 'action', 'reward', 'next_Xv', 'is_done'])
        EPS_START = 1.00
        EPS_END = 0.05
        EPS_DECAY = 500
        steps_done = 0
        for e in range(num_eps):

            env = MVC_environement(g)
            Xv, graph = env.reset_env()
            Xv = Xv.clone()
            graph = torch.unsqueeze(graph, 0)
            done = False
            non_selected = list(np.arange(env.num_nodes))
            selected = []
            N = 0
            fitted_experience_list = []
            reward_list = []
            self.agent.new_epsiode()
            while done == False:
                eps_threshold = EPS_END + (EPS_START - EPS_END) * math.exp(
                    -1. * steps_done / EPS_DECAY)
                if np.random.uniform() > eps_threshold:
                    val = self.agent(graph, Xv)[0]
                    val[selected] = -float('inf')
                    action = int(torch.argmax(val).item())
                else:
                    action = int(np.random.choice(non_selected))
                Xv_next, reward, done = env.take_action(action)
                Xv_next = Xv_next.clone()
                fit_ex = fitted_q_exp(graph, Xv, action, reward)
                fitted_experience_list.append(fit_ex)
                non_selected.remove(action)
                selected.append(action)
                N += 1
                reward_list.append(reward)

                if N >= N_STEP:
                    n_reward = sum(reward_list)
                    n_prev_ex = fitted_experience_list[0]
                    n_graph = n_prev_ex.graph
                    n_Xv = n_prev_ex.Xv
                    n_action = n_prev_ex.action
                    ex = experience(n_graph, n_Xv, torch.tensor([n_action]),
                                    torch.tensor([n_reward]), Xv_next, done)
                    self.agent.store_transition(ex)
                    fitted_experience_list.pop(0)
                    reward_list.pop(0)

                Xv = Xv_next
                steps_done += 1
                self.agent.train(batch_size=8, fitted_Q=True)
Exemplo n.º 3
0
    def train_with_graph(self, g):
        '''
        g: networkx graph
        '''
        N_STEP = 2
        env = MVC_environement(g)
        Xv, graph = env.reset_env()
        Xv = Xv.clone()
        graph = torch.unsqueeze(graph, 0)
        done = False
        fitted_experience_list = []
        reward_list = []
        #self.new_epsiode()
        self.non_selected = list(np.arange(env.num_nodes))
        self.selected = []
        self.N = 0
        while done == False:

            action = self.take_action(graph, Xv)
            Xv_next, reward, done = env.take_action(action)
            Xv_next = Xv_next.clone()
            fit_ex = fitted_q_exp(graph, Xv, action, reward)
            fitted_experience_list.append(fit_ex)
            self.N += 1
            reward_list.append(reward)
            if self.N >= N_STEP:
                n_reward = sum(reward_list)
                n_prev_ex = fitted_experience_list[0]
                n_graph = n_prev_ex.graph
                n_Xv = n_prev_ex.Xv
                n_action = n_prev_ex.action

                #print(sum(n_Xv[0]) , sum(Xv_next[0]))
                #aa
                ex = experience(n_graph, n_Xv, torch.tensor([n_action]),
                                torch.tensor([n_reward]), Xv_next, done)

                self.store_transition(ex)
                fitted_experience_list.pop(0)
                reward_list.pop(0)
            Xv = Xv_next
            self.train()
        self.episode_done += 1
        if self.episode_done > 0 and self.episode_done % 8 == 0:
            #print(self.steps_done)
            self.update_target_network()
Exemplo n.º 4
0
    def get_val_result(self, validation_graph, run_sparse=False):

        if type(validation_graph) is not list:
            validation_graph = [validation_graph]

        objective_vals = []
        for g in validation_graph:
            env = MVC_environement(g)
            Xv, graph = env.reset_env()
            if run_sparse:
                assert len(validation_graph) == 1
                #graph = torch.unsqueeze(graph,  0)
                graph = to_sparse_tensor(graph)
            else:
                graph = torch.unsqueeze(graph, 0)
            Xv = Xv.clone()
            if 'cuda' in self.device:
                Xv = Xv.cuda()
            graph = graph.to(self.device)
            done = False
            self.non_selected = list(np.arange(env.num_nodes))
            self.selected = []
            while done == False:
                #Xvprint(len(self.selected))
                #Xv = Xv.cuda()
                Xv = Xv.to(self.device)
                if run_sparse:
                    val = self.forward(graph, Xv)
                else:
                    val = self.forward(graph, Xv)[0]
                action = self.take_action(graph, Xv, is_validation=True)

                Xv_next, reward, done = env.take_action(action)
                Xv = Xv_next
            #print(selected)
            objective_vals.append(len(self.selected))
        return sum(objective_vals) / len(objective_vals)