Exemplo n.º 1
0
    def train_controller(self):
        """
            Train controller to find better structure.
        """
        print("*" * 35, "training controller", "*" * 35)
        model = self.controller
        model.train()

        baseline = None
        adv_history = []
        entropy_history = []
        reward_history = []

        hidden = self.controller.init_hidden(self.args.batch_size)
        total_loss = 0
        for step in range(self.args.controller_max_step):
            # sample graphnas
            structure_list, log_probs, entropies = self.controller.sample(
                with_details=True)

            # calculate reward
            np_entropies = entropies.data.cpu().numpy()
            results = self.get_reward(structure_list, np_entropies, hidden)
            torch.cuda.empty_cache()

            if results:  # has reward
                rewards, hidden = results
            else:
                continue  # CUDA Error happens, drop structure and step into next iteration

            # discount
            if 1 > self.args.discount > 0:
                rewards = discount(rewards, self.args.discount)

            reward_history.extend(rewards)
            entropy_history.extend(np_entropies)

            # moving average baseline
            if baseline is None:
                baseline = rewards
            else:
                decay = self.args.ema_baseline_decay
                baseline = decay * baseline + (1 - decay) * rewards

            adv = rewards - baseline
            history.append(adv)
            adv = scale(adv, scale_value=0.5)
            adv_history.extend(adv)

            adv = utils.get_variable(adv, self.cuda, requires_grad=False)
            # policy loss
            loss = -log_probs * adv
            if self.args.entropy_mode == 'regularizer':
                loss -= self.args.entropy_coeff * entropies

            loss = loss.sum()  # or loss.mean()

            # update
            self.controller_optim.zero_grad()
            loss.backward()

            if self.args.controller_grad_clip > 0:
                torch.nn.utils.clip_grad_norm(model.parameters(),
                                              self.args.controller_grad_clip)
            self.controller_optim.step()

            total_loss += utils.to_item(loss.data)

            self.controller_step += 1
            torch.cuda.empty_cache()

        print("*" * 35, "training controller over", "*" * 35)
Exemplo n.º 2
0
    def train_controller(self):
        """
            Train controller to find better structure.
        """
        print("*" * 35, "training controller", "*" * 35)
        model = self.controller
        model.train()

        baseline = None
        adv_history = []
        entropy_history = []
        reward_history = []

        hidden = self.controller.init_hidden(self.args.batch_size)
        total_loss = 0
        for step in range(self.args.controller_max_step):
            #contraller训练一次的时间
            once_controller_train_start_time = time.time()

            # sample graphnas
            structure_list, log_probs, entropies = self.controller.sample(with_details=True)

            # calculate reward
            np_entropies = entropies.data.cpu().numpy()
            results = self.get_reward(structure_list, np_entropies, hidden)
            torch.cuda.empty_cache()

            if results:  # has reward
                rewards, hidden, acc = results
            else:
                continue

            # discount
            if 1 > self.args.discount > 0:
                rewards = discount(rewards, self.args.discount)

            reward_history.extend(rewards)
            entropy_history.extend(np_entropies)

            # moving average baseline
            if baseline is None:
                baseline = rewards
            else:
                decay = self.args.ema_baseline_decay
                baseline = decay * baseline + (1 - decay) * rewards

            adv = rewards - baseline
            history.append(adv)
            adv = scale(adv, scale_value=0.5)
            adv_history.extend(adv)

            adv = utils.get_variable(adv, self.cuda, requires_grad=False)
            # policy loss
            loss = -log_probs * adv
            if self.args.entropy_mode == 'regularizer':
                loss -= self.args.entropy_coeff * entropies

            loss = loss.sum()  # or loss.mean()

            # update
            self.controller_optim.zero_grad()
            loss.backward()

            if self.args.controller_grad_clip > 0:
                torch.nn.utils.clip_grad_norm(model.parameters(),
                                              self.args.controller_grad_clip)
            self.controller_optim.step()

            total_loss += utils.to_item(loss.data)

            self.controller_step += 1
            torch.cuda.empty_cache()
            once_controller_train_end_time = time.time()
            print("the", step, "epcoh controller train time: ",
                  once_controller_train_end_time-once_controller_train_start_time, "s")

            if step == 0:
                self.RL_train_time.append(once_controller_train_start_time)
                self.RL_train_time.append(once_controller_train_end_time)
                self.RL_train_acc.append(acc)
            else:
                self.RL_train_time.append(once_controller_train_end_time)
                self.RL_train_acc.append(acc)
        print("all RL train time list: ", self.RL_train_time)
        print("all RL train acc list: ", self.RL_train_acc)
        print("*" * 35, "training controller over", "*" * 35)
        experiment_data_save("controler_train.txt", self.RL_train_time, self.RL_train_acc)
Exemplo n.º 3
0
    def train_controller(self):
        """
            Train controller to find better structure.
        """
        print("*" * 35, "training controller", "*" * 35)
        model = self.controller
        model.train()

        baseline = None
        adv_history = []
        entropy_history = []
        reward_history = []

        hidden = self.controller.init_hidden(self.args.batch_size)  #2 64 100
        total_loss = 0
        for step in range(self.args.controller_max_step):  # 100
            start_time = time()
            # sample graphnas
            structure_list, log_probs, entropies = self.controller.sample(
                with_details=True)  # num_sample = 1
            #[['gat', 'max', 'tanh', 1, 128, 'cos', 'sum', 'tanh', 4, 16]]
            #tensor([-1.9461, -1.3946, -2.0890, -1.7695, -1.9348, -1.9490, -1.3980, -2.0886,-1.7938, -1.9401], device='cuda:0', grad_fn=<CatBackward>)
            #tensor([1.9459, 1.3863, 2.0794, 1.7917, 1.9458, 1.9459, 1.3862, 2.0794, 1.7917,1.9458], device='cuda:0', grad_fn=<CatBackward>)
            # calculate reward
            np_entropies = entropies.data.cpu().numpy()
            results = self.get_reward(structure_list, np_entropies, hidden)
            torch.cuda.empty_cache()

            if results:  # has reward
                rewards, hidden = results
            else:
                continue  # CUDA Error happens, drop structure and step into next iteration

            # discount
            if 1 > self.args.discount > 0:
                rewards = discount(rewards, self.args.discount)

            reward_history.extend(rewards)
            entropy_history.extend(np_entropies)

            # moving average baseline
            if baseline is None:
                baseline = rewards
            else:
                decay = self.args.ema_baseline_decay
                baseline = decay * baseline + (1 - decay) * rewards

            adv = rewards - baseline
            history.append(adv)
            adv = scale(adv, scale_value=0.5)
            adv_history.extend(adv)

            adv = utils.get_variable(adv, self.cuda, requires_grad=False)
            # policy loss
            loss = -log_probs * adv
            if self.args.entropy_mode == 'regularizer':
                loss -= self.args.entropy_coeff * entropies

            loss = loss.sum()  # or loss.mean()

            # update
            self.controller_optim.zero_grad()
            loss.backward()

            if self.args.controller_grad_clip > 0:
                torch.nn.utils.clip_grad_norm(model.parameters(),
                                              self.args.controller_grad_clip)
            self.controller_optim.step()

            total_loss += utils.to_item(loss.data)

            self.controller_step += 1
            torch.cuda.empty_cache()
            elapsed = (time() - start_time)
            print('[%d/%d] time %.2f ' % (
                step + 1,
                self.args.controller_max_step,
                elapsed,
            ))
        print("*" * 35, "training controller over", "*" * 35)
Exemplo n.º 4
0
    def train_controller(self):
        """
            Train controller to find better structure.
        """
        print("*" * 35, "training controller", "*" * 35)

        model = self.controller

        # 使pytorch定义的controller模型进入训练模式
        # 每次训练都要初始化 adv_history,entropy_history,reward_history 三个列表
        model.train()

        baseline = None
        adv_history = []
        entropy_history = []
        reward_history = []

        # 初始化带batch_size参数的中 h0,c0向量,全部为0
        hidden = self.controller.init_hidden(self.args.batch_size)
        # self.args.batch_size = 64

        # 初始化控制器LSTM模型总损失值
        total_loss = 0

        for step in range(self.args.controller_max_step):
            # self.args.controller_max_step = 100
            # controller每次训练100轮,一轮选一个GNN结构

            # sample graphnas
            structure_list, log_probs, entropies = self.controller.sample(
                with_details=True)
            # structrue_list = ['gat', 'sum', 'relu', 2, 8, 'linear', 'mlp', 'tanh', 2, 4],--->选择出的GNN结构
            # log_probs = tensor([-1.9461, -1.3936, -2.0807, -1.7964, -1.9570, -1.9413, -1.3704, -2.0878,
            #         -1.7907, -1.9185], device='cuda:0', grad_fn=<CatBackward>)--->LSTM每一步选择的operator的自信息I
            # entropies = tensor([1.9459, 1.3863, 2.0794, 1.7917, 1.9458, 1.9459, 1.3862, 2.0794, 1.7917,
            #         1.9458], device='cuda:0', grad_fn=<CatBackward>)--->LSTM每一步输出信息熵

            # calculate reward
            np_entropies = entropies.data.cpu().numpy()

            results = self.get_reward(structure_list, np_entropies, hidden)

            # results = (rewards, hidden) hidden原封不动的回来了

            torch.cuda.empty_cache()

            if results:  # has reward
                rewards, hidden = results
            else:
                continue  # CUDA Error happens, drop structure and step into next iteration

            # discount
            # 使用滤波器实现rewards折扣损失计算,重新计算rewards列表内的reward
            if 1 > self.args.discount > 0:
                # self.args.discount = 1
                rewards = discount(rewards, self.args.discount)
                # 每次训练都要初始化 adv_history,entropy_history,reward_history 三个列表
                # controller每次训练100轮,一轮选一个GNN结构
                """"
def discount(x, amount):
    return scipy.signal.lfilter([1], [1, -amount], x[::-1], axis=0)[::-1]
    
    x[::-1] : 将x序列翻转
                """

            reward_history.extend(rewards)

            entropy_history.extend(np_entropies)

            # moving average baseline
            if baseline is None:
                baseline = rewards
            else:
                decay = self.args.ema_baseline_decay
                baseline = decay * baseline + (1 - decay) * rewards

            adv = rewards - baseline

            history.append(adv)

            adv = scale(adv, scale_value=0.5)
            """
 def scale(value, last_k=10, scale_value=1):
    '''
    scale value into [-scale_value, scale_value], according last_k history
    '''
    max_reward = np.max(history[-last_k:])
    if max_reward == 0:
        return value
    return scale_value / max_reward * value
            
            """

            adv_history.extend(adv)

            adv = utils.get_variable(adv, self.cuda, requires_grad=False)

            # policy loss
            loss = -log_probs * adv

            if self.args.entropy_mode == 'regularizer':
                loss -= self.args.entropy_coeff * entropies

            loss = loss.sum()  # or loss.mean()

            # update
            self.controller_optim.zero_grad()
            loss.backward()

            if self.args.controller_grad_clip > 0:
                torch.nn.utils.clip_grad_norm(model.parameters(),
                                              self.args.controller_grad_clip)
            self.controller_optim.step()

            total_loss += utils.to_item(loss.data)

            self.controller_step += 1
            torch.cuda.empty_cache()

        print("*" * 35, "training controller over", "*" * 35)