def __init__(self, env, gamma, buffer_size, ddqn): self.env = env [Sdim, Adim] = env.get_dims() self.model = ConvNet(Sdim[0], Sdim[0], 3, Adim).cuda() self.target_model = copy.deepcopy(self.model).cuda() self.her = HER() self.gamma = gamma self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.0001) self.batch_size = 16 self.epsilon = 0.1 self.buffer_size = buffer_size self.step_counter = 0 self.epsi_high = 0.9 self.epsi_low = 0.1 self.steps = 0 self.count = 0 self.decay = 2000 self.eps = self.epsi_high self.update_target_step = 3000 self.log = logger() self.log.add_log('tot_return') self.log.add_log('avg_loss') self.log.add_log('final_dist') self.log.add_log('buffer') self.image_mean = 0 self.image_std = 0 self.ddqn = ddqn self.replay_buffer = deque(maxlen=buffer_size)
def __init__(self, env, gamma, buffer_size, cuda_flag): self.env = env self.N = env.N self.cuda_flag = cuda_flag if self.cuda_flag: self.model = Policy(2 * self.N, self.N).cuda() self.target_model = copy.deepcopy(self.model).cuda() else: self.model = Policy(2 * self.N, self.N) self.target_model = copy.deepcopy(self.model) self.her = HER(self.N) self.gamma = gamma self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.0005) self.batch_size = 64 self.epsilon = 0.1 self.buffer_size = buffer_size self.step_counter = 0 self.epsi_high = 0.9 self.epsi_low = 0.05 self.steps = 0 self.count = 0 self.decay = 200 self.eps = self.epsi_high self.update_target_step = 1000 self.log = logger() self.log.add_log('tot_return') self.log.add_log('avg_loss') self.log.add_log('final_dist') self.replay_buffer = deque(maxlen=buffer_size)
def __init__(self, env, gamma, buffer_size): self.env = env acts = env.action_space obs = env.observation_space self.model = QNet(obs.shape[0], acts.n, 64) self.target_model = copy.deepcopy(self.model) self.rnd = RND(obs.shape[0], 64, 124) #随机神经蒸馏网络 self.gamma = gamma #折扣因子 self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.001) #只优化DQN的参数 self.batch_size = 64 self.epsilon = 0.1 self.buffer_size = buffer_size self.step_counter = 0 self.epsi_high = 0.9 self.epsi_low = 0.05 self.steps = 0 self.count = 0 self.decay = 200 self.eps = self.epsi_high self.update_target_step = 500 self.log = logger() self.log.add_log('real_return') self.log.add_log('combined_return') self.log.add_log('avg_loss') self.replay_buffer = deque(maxlen=buffer_size)
def __init__(self, problem, gamma=1.0, eps=0.1, lr=1e-4, cuda_flag=True): self.problem = problem self.G = problem.g self.k = problem.k self.m = problem.m self.ajr = problem.adjacent_reserve self.hidden_dim = problem.hidden_dim self.n = self.k * self.m self.eps = eps if cuda_flag: self.model = DQNet(k=self.k, m=self.m, ajr=self.ajr, num_head=4, hidden_dim=self.hidden_dim).cuda() else: self.model = DQNet(k=self.k, m=self.m, ajr=self.ajr, num_head=4, hidden_dim=self.hidden_dim) self.gamma = gamma self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr) self.experience_replay_buffer = [] self.replay_buffer_max_size = 1e3 self.cuda = cuda_flag self.log = logger() self.log.add_log('tot_return') self.log.add_log('TD_error') self.log.add_log('entropy')
def __init__(self, problem , action_type='swap' , gamma=1.0, eps=0.1, lr=1e-4, action_dropout=1.0 , sample_batch_episode=False , replay_buffer_max_size=5000 , epi_len=50, new_epi_batch_size=10 , edge_info='adj_weight' , readout='mlp' , explore_method='epsilon_greedy' , priority_sampling='False' , clip_target=False): self.problem = problem self.action_type = action_type self.G = problem.g # the graph self.k = problem.k # num of clusters self.ajr = problem.adjacent_reserve # degree of node in graph self.hidden_dim = problem.hidden_dim # hidden dimension for node representation self.n = problem.N # num of nodes self.eps = eps # constant for exploration in dqn self.edge_info = edge_info self.explore_method = explore_method self.clip_target = clip_target self.model = DQNet(k=self.k, n=self.n, num_head=2, hidden_dim=self.hidden_dim, edge_info=self.edge_info, readout=readout).cuda() self.model_target = dc(self.model) self.gamma = gamma # reward decay const self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr) self.sample_batch_episode = sample_batch_episode self.experience_replay_buffer = [] self.replay_buffer_max_size = replay_buffer_max_size self.buf_epi_len = epi_len # 50 self.new_epi_batch_size = new_epi_batch_size # 10 self.cascade_replay_buffer = [[] for _ in range(self.buf_epi_len)] self.cascade_replay_buffer_weight = torch.zeros((self.buf_epi_len, self.new_epi_batch_size)) self.stage_max_sizes = [self.replay_buffer_max_size // self.buf_epi_len] * self.buf_epi_len # [100, 100, ..., 100] # self.stage_max_sizes = list(range(100,100+4*50, 4)) self.buffer_actual_size = sum(self.stage_max_sizes) self.priority_sampling = priority_sampling self.cascade_buffer_kcut_value = torch.zeros((self.buf_epi_len, self.new_epi_batch_size)) self.action_dropout = action_dropout self.log = logger() self.Q_err = 0 # Q error self.log.add_log('tot_return') self.log.add_log('Q_error') self.log.add_log('entropy') self.log.add_log('R_signal')
def __init__(self, problem, cuda_flag): self.problem = problem ndim = self.problem.get_graph_dims() if cuda_flag: self.model = ACNet(ndim, 264, 1).cuda() else: #128 #264 self.model = ACNet(ndim, 264, 1) self.gamma = 0.98 self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.0001) self.batch_size = 32 self.num_episodes = 1 self.cuda = cuda_flag self.log = logger() self.log.add_log('tot_return') self.log.add_log('TD_error') self.log.add_log('entropy')
def __init__(self, problem, cuda_flag): self.problem = problem ndim = self.problem.get_graph_dims() self.num_grp = self.problem.num_grp self.n = self.problem.N if cuda_flag: self.model = ACNet(ndim, 128*2, self.num_grp, self.n).cuda() # self.model = torch.nn.DataParallel(self.model) else: self.model = ACNet(ndim, 128*2, self.num_grp, self.n) self.gamma = 0.98 self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.0003) self.batch_size = 32 self.num_episodes = 1 self.cuda = cuda_flag self.log = logger() self.log.add_log('tot_return') self.log.add_log('TD_error') self.log.add_log('entropy')
def __init__(self, env, gamma, timer, buffer_size, scale_intrinsic): self.env = env actions = env.action_space observations = env.observation_space # observations = np.ones((2,1)) # observations = np.ones((2, 1)) self.timer = timer self.gamma = gamma self.buffer_size = buffer_size self.scale_intrinsic = scale_intrinsic self.model = NeuralNet(observations.shape[0], actions.n, 64) self.target_model = copy.deepcopy(self.model) self.rnd = RandomNetwork(observations.shape[0], 64, 124) self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.001) self.batch_size = 64 self.epsi_high = 0.9 # self.epsilon = self.epsi_high self.epsi_low = 0.05 self.step_counter = 0 # self.epsi_high = 0.9 # self.epsi_low = 0.05 self.steps = 0 self.count = 0 self.decay = 200 self.eps = self.epsi_high # self.eps = 0.3 self.update_target_step = 300 self.log = logger() self.log.add_log('real_return') self.log.add_log('combined_return') self.log.add_log('avg_loss') self.replay_buffer = deque(maxlen=buffer_size)
def __init__(self, problem, action_type='swap', gamma=1.0, eps=0.1, lr=1e-4, replay_buffer_max_size=10, replay_buffer_max_size2=5000, extended_h=False, time_aware=False, use_x=False, edge_info='adj_weight', readout='mlp', clip_target=False, use_calib_reward=False, cuda_flag=True): self.problem = problem self.action_type = action_type self.G = problem.g # the graph self.k = problem.k # num of clusters self.m = problem.m # num of nodes in cluster self.ajr = problem.adjacent_reserve # degree of node in graph self.hidden_dim = problem.hidden_dim # hidden dimension for node representation self.n = self.k * self.m # num of nodes self.eps = eps # constant for exploration in dqn self.extended_h = extended_h self.use_x = use_x self.edge_info = edge_info self.clip_target = clip_target self.use_calib_reward = use_calib_reward if cuda_flag: self.model = DQNet(k=self.k, m=self.m, ajr=self.ajr, num_head=4, hidden_dim=self.hidden_dim, extended_h=self.extended_h, use_x=self.use_x, edge_info=self.edge_info, readout=readout).cuda() else: self.model = DQNet(k=self.k, m=self.m, ajr=self.ajr, num_head=4, hidden_dim=self.hidden_dim, extended_h=self.extended_h, use_x=self.use_x, edge_info=self.edge_info, readout=readout) # self.model.apply(self.weights_init) # initialize weight self.model_target = dc(self.model) self.gamma = gamma # reward decay const self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr) self.experience_replay_buffer = [] self.experience_replay_buffer2 = [] self.buffer_episode_offset = [0] self.buffer_indices = [] self.replay_buffer_max_size = replay_buffer_max_size self.replay_buffer_max_size2 = replay_buffer_max_size2 self.time_aware = time_aware self.cuda = cuda_flag self.log = logger() self.Q_err = 0 # Q error self.log.add_log('tot_return') self.log.add_log('Q_error') self.log.add_log('entropy') self.log.add_log('R_signal')