示例#1
0
    def prepare_miniBatch(self, transitions_raw):
        transitions = ExtendedTransition(*zip(*transitions_raw))
        action = torch.tensor(transitions.action,
                              device=self.device,
                              dtype=torch.long).unsqueeze(
                                  -1)  # shape(batch, 1)
        reward = torch.tensor(transitions.reward,
                              device=self.device,
                              dtype=torch.float32).unsqueeze(
                                  -1)  # shape(batch, 1)

        # for some env, the output state requires further processing before feeding to neural network
        if self.stateProcessor is not None:
            state, _ = self.stateProcessor(transitions.state,
                                           device=self.device)
            nonFinalNextState, nonFinalMask, finalNextState, finalMask = self.stateProcessor(
                transitions.next_state,
                device=self.device,
                done=transitions.done)
        else:
            raise NotImplementedError
            # state = torch.tensor(transitions.state, device=self.device, dtype=torch.float32)
            # non final if there is at least one stage not finish
            # nonFinalMask = torch.tensor(tuple(map(lambda s: not np.all(s['stage']), transitions.done)), device=self.device, dtype=torch.uint8)
            # nonFinalNextState = torch.tensor(transitions.next_state[nonFinalMask], device=self.device, dtype=torch.float32)
            # finalMask = [map(lambda s: s['global']), transitions.done]
            # finalNextState = torch.tensor(transitions.next_state[finalMask], device=self.device, dtype=torch.float32)

        return state, action, reward, nonFinalNextState, nonFinalMask, finalNextState, finalMask
 def store_experience(self, state, action, nextState, reward, done, info):
     # if it is one step
     transition = ExtendedTransition(state, action, nextState, reward, done)
     self.memory.push(transition)
     if self.hindSightER:
         self.process_hindSightExperience(state, action, nextState, reward,
                                          done, info)
 def process_hindSightExperience(self, state, action, nextState, reward,
                                 done, info):
     if not done and self.globalStepCount % self.hindSightERFreq == 0:
         stateNew, actionNew, nextStateNew, rewardNew, doneNew = self.env.getHindSightExperience(
             state, action, nextState, done, info)
         if stateNew is not None:
             transition = ExtendedTransition(stateNew, actionNew,
                                             nextStateNew, rewardNew,
                                             doneNew)
             self.memory.push(transition)
示例#4
0
    def store_experience(self, state, action, nextState, reward, done, info):

        if self.experienceProcessor is not None:
            state, action, nextState, reward = self.experienceProcessor(
                state, action, nextState, reward, done, info)
            # caution: using multiple step forward return can increase variance
            # if it is one step

        timeStep = state['stageID']
        done['id'] = self.globalStepCount
        transition = ExtendedTransition(state, action, nextState, reward, done)
        #if done['stage'][0] and state['state'][1] < 1 and state['stageID'] == 0:
        #    print('issue!!!!!!!')
        self.memories[timeStep].push(transition)
示例#5
0
    def prepare_minibatch(self, transitions_raw):
        # first store memory

        transitions = ExtendedTransition(*zip(*transitions_raw))
        action = torch.tensor(transitions.action,
                              device=self.device,
                              dtype=torch.long).unsqueeze(
                                  -1)  # shape(batch, 1)
        reward = torch.tensor(transitions.reward,
                              device=self.device,
                              dtype=torch.float32).unsqueeze(
                                  -1)  # shape(batch, 1)

        # for some env, the output state requires further processing before feeding to neural network
        if self.stateProcessor is not None:
            state, _ = self.stateProcessor(transitions.state, self.device)
            nonFinalNextState, nonFinalMask, finalNextState, finalMask = self.stateProcessor(
                transitions.next_state, self.device, transitions.done)
        else:
            state = torch.tensor(transitions.state,
                                 device=self.device,
                                 dtype=torch.float32)
            nextState = torch.tensor(transitions.next_state,
                                     device=self.device,
                                     dtype=torch.float32)
            # final mask is one that have stage done
            finalMask = torch.tensor(transitions.done,
                                     device=self.device,
                                     dtype=torch.uint8)
            nonFinalMask = 1 - finalMask
            finalNextState = [
                nextState[i] for i in range(self.trainBatchSize)
                if finalMask[i]
            ]
            nonFinalNextState = [
                nextState[i] for i in range(self.trainBatchSize)
                if nonFinalMask[i]
            ]

            if len(nonFinalNextState):
                nonFinalNextState = torch.stack(nonFinalNextState)

            if len(finalNextState):
                finalNextState = torch.stack(finalNextState)

        return state, nonFinalMask, nonFinalNextState, finalMask, finalNextState, action, reward
示例#6
0
 def store_experience(self, state, action, nextState, reward, done, info):
     # if it is one step
     transition = ExtendedTransition(state, action, nextState, reward, done)
     self.memory.push(transition)