def turn_end(self, status): if self.train_enable: if self.last_choice != None: if status.actor == None: reward = REWARD_WIN if status.winner == self.side \ else REWARD_DRAW if status.winner == None \ else REWARD_LOSE train_dict = copy.copy(self.train_dict) train_dict['state_1'] = train_dict['state_0'] train_dict['cont'] = 0 train_dict['reward_1'] = reward self.dl.push_train_dict(train_dict) self.dl.do_train() else: new_status = dlplayer.conv_status(status, tttl.OPP[self.side]) train_dict = copy.copy(self.train_dict) train_dict['state_1'] = new_status train_dict['cont'] = 1 train_dict['reward_1'] = REWARD_STEP self.dl.push_train_dict(train_dict) self.dl.do_train() self.legit_mask = None self.train_dict = None self.last_choice = None
def turn_end(self,status): if self.train_enable: if self.last_choice != None: if status.actor == None: reward = REWARD_WIN if status.winner == self.side \ else REWARD_DRAW if status.winner == None \ else REWARD_LOSE train_dict = copy.copy(self.train_dict) train_dict['state_1'] = train_dict['state_0'] train_dict['cont'] = 0 train_dict['reward_1'] = reward self.dl.push_train_dict(train_dict) self.dl.do_train() else: new_status = dlplayer.conv_status(status,tttl.OPP[self.side]) train_dict = copy.copy(self.train_dict) train_dict['state_1'] = new_status train_dict['cont'] = 1 train_dict['reward_1'] = REWARD_STEP self.dl.push_train_dict(train_dict) self.dl.do_train() self.legit_mask = None self.train_dict = None self.last_choice = None
def input(self, status, retry): if not retry: self.legit_mask = [1.0] * 9 self.train_dict = None self.last_choice = None else: self.legit_mask[self.last_choice] = 0.0 if self.train_enable: if self.train_dict != None: train_dict = copy.copy(self.train_dict) train_dict['state_1'] = train_dict['state_0'] train_dict['cont'] = 0 train_dict['reward_1'] = REWARD_BAD self.dl.push_train_dict(train_dict) self.dl.do_train() new_status = dlplayer.conv_status(status, self.side) self.train_dict, _ = self.dl.cal_choice(new_status, self.legit_mask, self.train_enable) choice = self.train_dict['choice_0'] print("GKPMPCLI choice: " + str(choice)) self.last_choice = choice return dlplayer.ACTION_MAP[choice]
def input(self,status): if self.legit_mask == None: self.legit_mask = [1.0]*9 new_status = dlplayer.conv_status(status,self.side) self.train_dict, _ = self.dl.cal_choice(new_status,self.legit_mask,self.train_enable) choice = self.train_dict['choice_0'] logging.debug("GKPMPCLI choice: "+str(choice)) self.last_choice = choice return dlplayer.ACTION_MAP[choice]
def input(self,status): if self.legit_mask == None: self.legit_mask = [1.0]*9 new_status = dlplayer.conv_status(status,self.side) self.train_dict, _ = self.dl.cal_choice(new_status,self.legit_mask,self.train_enable) choice = self.train_dict['choice_0'] logging.debug("GKPMPCLI choice: "+str(choice)) self.last_choice = choice return dlplayer.ACTION_MAP[choice]
def input(self,status,retry): if not retry: self.legit_mask = [1.0]*9 self.train_dict = None self.last_choice = None else: self.legit_mask[self.last_choice] = 0.0 if self.train_enable: if self.train_dict != None: train_dict = copy.copy(self.train_dict) train_dict['state_1'] = train_dict['state_0'] train_dict['cont'] = 0 train_dict['reward_1'] = REWARD_BAD self.dl.push_train_dict(train_dict) self.dl.do_train() new_status = dlplayer.conv_status(status,self.side) self.train_dict, _ = self.dl.cal_choice(new_status,self.legit_mask,self.train_enable) choice = self.train_dict['choice_0'] print("GKPMPCLI choice: "+str(choice)) self.last_choice = choice return dlplayer.ACTION_MAP[choice]