class Agent(mp.Process): def __init__(self, GlobalNet, MEM, CNS_ip, CNS_port, Remote_ip, Remote_port): mp.Process.__init__(self) # Network info self.GlobalNet = GlobalNet self.LocalNet = PPOModel(nub_para=2, time_leg=10) self.LocalNet.load_state_dict(GlobalNet.state_dict()) self.optimizer = optim.Adam(GlobalNet.parameters(), lr=learning_rate) # CNS self.CNS = CNS(self.name, CNS_ip, CNS_port, Remote_ip, Remote_port) # SharedMem self.mem = MEM self.LocalMem = copy.deepcopy(self.mem) # Work info self.W = Work_info() # ============================================================================================================== # 제어 신호 보내는 파트 def send_action_append(self, pa, va): for _ in range(len(pa)): self.para.append(pa[_]) self.val.append(va[_]) def send_action(self, act): # 전송될 변수와 값 저장하는 리스트 self.para = [] self.val = [] # 최종 파라메터 전송 self.CNS._send_control_signal(self.para, self.val) # # ============================================================================================================== # 입력 출력 값 생성 def InitialStateSet(self): self.PhyPara = ['ZINST58', 'ZINST63'] self.PhyState = {_:deque(maxlen=self.W.TimeLeg) for _ in self.PhyPara} self.COMPPara = ['BFV122', 'BPV145'] self.COMPState = {_: deque(maxlen=self.W.TimeLeg) for _ in self.COMPPara} def MakeStateSet(self): # 값을 쌓음 (return Dict) [self.PhyState[_].append(self.PreProcessing(_, self.CNS.mem[_]['Val'])) for _ in self.PhyPara] [self.COMPState[_].append(self.PreProcessing(_, self.CNS.mem[_]['Val'])) for _ in self.COMPPara] # Tensor로 전환 self.S_Py = torch.tensor([self.PhyState[key] for key in self.PhyPara]) self.S_Py = self.S_Py.reshape(1, self.S_Py.shape[0], self.S_Py.shape[1]) self.S_Comp = torch.tensor([self.COMPState[key] for key in self.COMPPara]) self.S_Comp = self.S_Comp.reshape(1, self.S_Comp.shape[0], self.S_Comp.shape[1]) # Old 1개 리스트 self.S_ONE_Py = [self.PhyState[key][-1] for key in self.PhyPara] self.S_ONE_Comp = [self.COMPState[key][-1] for key in self.COMPPara] def PreProcessing(self, para, val): if para == 'ZINST58': val = round(val/1000, 7) # 가압기 압력 if para == 'ZINST63': val = round(val/100, 7) # 가압기 수위 return val # ============================================================================================================== def run(self): while True: self.CNS.init_cns(initial_nub=1) time.sleep(1) # self.CNS._send_malfunction_signal(12, 100100, 15) # time.sleep(1) # Get iter self.CurrentIter = self.mem['Iter'] self.mem['Iter'] += 1 print(self.CurrentIter) # Initial done = False self.InitialStateSet() while not done: for t in range(self.W.TimeLeg): self.CNS.run_freeze_CNS() self.MakeStateSet() for __ in range(15): spy_lst, scomp_lst, a_lst, r_lst = [], [], [], [] # Sampling for t in range(5): PreVal = self.LocalNet.GetPredictActorOut(x_py=self.S_Py, x_comp=self.S_Comp) PreVal = PreVal.tolist()[0] # (1, 2)-> (2. ) spy_lst.append(self.S_Py.tolist()[0]) # (1, 2, 10) -list> (2, 10) scomp_lst.append(self.S_Comp.tolist()[0]) # (1, 2, 10) -list> (2, 10) a_lst.append(PreVal) # (2, ) old_before = {0: 0, 1: 0} for nub_val in range(0, 2): old_before[nub_val] = self.S_ONE_Py[nub_val] + PreVal[nub_val] self.CNS.run_freeze_CNS() self.MakeStateSet() r = {0: 0, 1: 0} for nub_val in range(0, 2): if self.S_ONE_Py[nub_val] - 0.0001 < old_before[nub_val] < self.S_ONE_Py[nub_val] + 0.0001: r[nub_val] = 0.1 else: r[nub_val] = -0.1 if r[0] == 0.1 and r[1] == 0.1: t_r = 0.1 else: t_r = -0.1 # t_r = r[0] + r[1] r_lst.append(t_r) print(self.CurrentIter, PreVal, self.S_ONE_Py[0] - 0.0001, old_before[0], self.S_ONE_Py[0], self.S_ONE_Py[0] + 0.0001, '|', self.S_ONE_Py[1] - 0.0001, old_before[1], self.S_ONE_Py[1], self.S_ONE_Py[1] + 0.0001, '|', r[0], r[1], t_r) # Train! # print('Train!!!') # GAE spy_fin = self.S_Py # (1, 2, 10) scomp_fin = self.S_Comp # (1, 2, 10) R = 0.0 if done else self.LocalNet.GetPredictCrticOut(spy_fin, scomp_fin).item() td_target_lst = [] for reward in r_lst[::-1]: R = gamma * R + reward td_target_lst.append([R]) td_target_lst.reverse() # Batch 만들기 spy_batch = torch.tensor(spy_lst, dtype=torch.float) scomp_batch = torch.tensor(scomp_lst, dtype=torch.float) a_batch = torch.tensor(a_lst, dtype=torch.float) td_target = torch.tensor(td_target_lst) value = self.LocalNet.GetPredictCrticOut(spy_batch, scomp_batch) advantage = td_target - value PreVal = self.LocalNet.GetPredictActorOut(x_py=spy_batch, x_comp=scomp_batch) loss = -torch.log(PreVal) * advantage.detach() + \ nn.functional.smooth_l1_loss(self.LocalNet.GetPredictCrticOut(spy_batch, scomp_batch), td_target.detach()) # Loss Display self.optimizer.zero_grad() loss.mean().backward() for global_param, local_param in zip(self.GlobalNet.parameters(), self.LocalNet.parameters()): global_param._grad = local_param.grad self.optimizer.step() self.LocalNet.load_state_dict(self.GlobalNet.state_dict()) break print('Done')
class Agent(mp.Process): def __init__(self, GlobalNet, MEM, CNS_ip, CNS_port, Remote_ip, Remote_port): mp.Process.__init__(self) # Network info self.GlobalNet = GlobalNet self.LocalNet = NETBOX() for _ in range(0, self.LocalNet.NubNET): self.LocalNet.NET[_].load_state_dict( self.GlobalNet.NET[_].state_dict()) self.LocalOPT = NETOPTBOX(NubNET=self.LocalNet.NubNET, NET=self.GlobalNet.NET) # CNS self.CNS = CNS(self.name, CNS_ip, CNS_port, Remote_ip, Remote_port) # SharedMem self.mem = MEM self.LocalMem = copy.deepcopy(self.mem) # Work info self.W = Work_info() print(f'Make -- {self}') # ============================================================================================================== # 제어 신호 보내는 파트 def send_action_append(self, pa, va): for _ in range(len(pa)): self.para.append(pa[_]) self.val.append(va[_]) def send_action(self, act): # 전송될 변수와 값 저장하는 리스트 self.para = [] self.val = [] # 최종 파라메터 전송 self.CNS._send_control_signal(self.para, self.val) # # ============================================================================================================== # 입력 출력 값 생성 def InitialStateSet(self): self.PhyPara = ['ZINST58', 'ZINST63'] self.PhyState = {_: deque(maxlen=self.W.TimeLeg) for _ in self.PhyPara} self.COMPPara = ['BFV122', 'BPV145'] self.COMPState = { _: deque(maxlen=self.W.TimeLeg) for _ in self.COMPPara } def MakeStateSet(self): # 값을 쌓음 (return Dict) [ self.PhyState[_].append( self.PreProcessing(_, self.CNS.mem[_]['Val'])) for _ in self.PhyPara ] [ self.COMPState[_].append( self.PreProcessing(_, self.CNS.mem[_]['Val'])) for _ in self.COMPPara ] # Tensor로 전환 self.S_Py = torch.tensor([self.PhyState[key] for key in self.PhyPara]) self.S_Py = self.S_Py.reshape(1, self.S_Py.shape[0], self.S_Py.shape[1]) self.S_Comp = torch.tensor( [self.COMPState[key] for key in self.COMPPara]) self.S_Comp = self.S_Comp.reshape(1, self.S_Comp.shape[0], self.S_Comp.shape[1]) # Old 1개 리스트 self.S_ONE_Py = [self.PhyState[key][-1] for key in self.PhyPara] self.S_ONE_Comp = [self.COMPState[key][-1] for key in self.COMPPara] def PreProcessing(self, para, val): if para == 'ZINST58': val = round(val / 1000, 7) # 가압기 압력 if para == 'ZINST63': val = round(val / 100, 7) # 가압기 수위 return val # ============================================================================================================== def run(self): while True: self.CNS.init_cns(initial_nub=1) print('DONE initial') time.sleep(1) # self.CNS._send_malfunction_signal(12, 100100, 15) # time.sleep(1) # Get iter self.CurrentIter = self.mem['Iter'] self.mem['Iter'] += 1 print(self.CurrentIter) # Initial done = False self.InitialStateSet() while not done: for t in range(self.W.TimeLeg): self.CNS.run_freeze_CNS() self.MakeStateSet() for __ in range(15): spy_lst, scomp_lst, a_lst, r_lst = [], [], [], [] a_dict = {_: [] for _ in range(self.LocalNet.NubNET)} r_dict = {_: [] for _ in range(self.LocalNet.NubNET)} # Sampling for t in range(5): TimeDB = { 'Netout': {}, # 0: .. 1:.. } for nubNet in range(self.LocalNet.NubNET): NetOut = self.LocalNet.NET[ nubNet].GetPredictActorOut(x_py=self.S_Py, x_comp=self.S_Comp) NetOut = NetOut.tolist()[0][ 0] # (1, 1) -> (1, ) -> () TimeDB['Netout'][nubNet] = NetOut a_dict[nubNet] = NetOut spy_lst.append( self.S_Py.tolist()[0]) # (1, 2, 10) -list> (2, 10) scomp_lst.append(self.S_Comp.tolist() [0]) # (1, 2, 10) -list> (2, 10) old_before = {0: 0, 1: 0} for nubNet in range(self.LocalNet.NubNET): old_before[nubNet] = self.S_ONE_Py[ nubNet] + TimeDB['Netout'][nubNet] self.CNS.run_freeze_CNS() self.MakeStateSet() r = {0: 0, 1: 0} for nub_val in range(0, 2): if self.S_ONE_Py[nub_val] - 0.0001 < old_before[ nub_val] < self.S_ONE_Py[nub_val] + 0.0001: r[nub_val] = 1 else: r[nub_val] = 0 if r[0] == 0.1 and r[1] == 0.1: t_r = 0.1 else: t_r = -0.1 # t_r = r[0] + r[1] # r_lst.append(t_r) for nubNet in range( self.LocalNet.NubNET): # 보상 네트워크별로 저장 r_dict[nubNet].append(r[nubNet]) print(self.CurrentIter, TimeDB['Netout'], self.S_ONE_Py[0] - 0.0001, old_before[0], self.S_ONE_Py[0], self.S_ONE_Py[0] + 0.0001, '|', self.S_ONE_Py[1] - 0.0001, old_before[1], self.S_ONE_Py[1], self.S_ONE_Py[1] + 0.0001, '|', r[0], r[1], t_r) # ================================================================================================== # Train gamma = 0.98 spy_fin = self.S_Py # (1, 2, 10) scomp_fin = self.S_Comp # (1, 2, 10) spy_batch = torch.tensor(spy_lst, dtype=torch.float) scomp_batch = torch.tensor(scomp_lst, dtype=torch.float) # 각 네트워크 별 Advantage 계산 for nubNet in range(self.LocalNet.NubNET): R = 0.0 if done else self.LocalNet.NET[ nubNet].GetPredictCrticOut(spy_fin, scomp_fin).item() td_target_lst = [] for reward in r_dict[nubNet][::-1]: R = gamma * R + reward td_target_lst.append([R]) td_target_lst.reverse() td_target = torch.tensor(td_target_lst) value = self.LocalNet.NET[nubNet].GetPredictCrticOut( spy_batch, scomp_batch) advantage = td_target - value PreVal = self.LocalNet.NET[nubNet].GetPredictActorOut( spy_batch, scomp_batch) loss = -torch.log(PreVal) * advantage.detach() + \ nn.functional.smooth_l1_loss(self.LocalNet.NET[nubNet].GetPredictCrticOut(spy_batch, scomp_batch), td_target.detach()) self.LocalOPT.NETOPT[nubNet].zero_grad() loss.mean().backward() for global_param, local_param in zip( self.GlobalNet.NET[nubNet].parameters(), self.LocalNet.NET[nubNet].parameters()): global_param._grad = local_param.grad self.LocalOPT.NETOPT[nubNet].step() self.LocalNet.NET[nubNet].load_state_dict( self.GlobalNet.NET[nubNet].state_dict()) # TOOL.ALLP(advantage.mean()) print(self.CurrentIter, 'adv: ', advantage.mean().item(), 'loss: ', loss.mean().item()) print('DONE EP') break