class Actor(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, learning_rate=3e-4):
        super(Actor, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.linear2 = nn.Linear(hidden_size, hidden_size)
        self.linear3_1 = nn.Linear(hidden_size, 1)
        self.linear3_2 = nn.Linear(hidden_size, 1)
        self.apply(xavier_init)
        self.noise = OUNoise(output_size, seed = 1, mu=0.0, theta=0.15, max_sigma=0.3, min_sigma=0.05, decay_period=10000)
        self.noise.reset()
    def forward(self, state):
        """
        Param state is a torch tensor
        """
        x = F.relu(self.linear1(state))
        x = F.relu(self.linear2(x))
        action0 = torch.sigmoid(self.linear3_1(x))
        action1 = torch.tanh(self.linear3_2(x))
        action = torch.cat([action0, action1], dim=1)
        return action

    def select_action(self, state, noiseFlag = False):
        if noiseFlag:
            action = self.forward(state)
            action += torch.tensor(self.noise.get_noise(), dtype=torch.float32, device=config['device']).unsqueeze(0)
        return self.forward(state)
Пример #2
0
class ActorConvNet(nn.Module):
    def __init__(self, inputWidth, num_hidden, num_action):
        super(ActorConvNet, self).__init__()

        self.inputShape = (inputWidth, inputWidth, inputWidth)
        self.layer1 = nn.Sequential(  # input shape (1, inputWdith, inputWdith)
            nn.Conv3d(1,  # input channel
                      32,  # output channel
                      kernel_size=2,  # filter size
                      stride=1,
                      padding=1),
            # if want same width and length of this image after Conv2d, padding=(kernel_size-1)/2 if stride=1
            nn.BatchNorm3d(32),
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=2, stride=2))  # inputWdith / 2

        self.layer2 = nn.Sequential(
            nn.Conv3d(32, 64, kernel_size=2, stride=1, padding=1),
            nn.BatchNorm3d(64),
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=2, stride=2))  # inputWdith / 2
        # add a fully connected layer
        # width = int(inputWidth / 4) + 1

        # 6 dim for state
        self.fc0 = nn.Linear(6, 256)
        self.fc1 = nn.Linear(self.featureSize() + 256, num_hidden)
        self.fc2 = nn.Linear(num_hidden, num_action)
        self.apply(xavier_init)
        self.noise = OUNoise(num_action, seed=1, mu=0.0, theta=0.15, max_sigma=0.5, min_sigma=0.1, decay_period=1000000)
        self.noise.reset()

    def forward(self, state):
        x = state['sensor']
        y = state['target']
        xout = self.layer1(x)
        xout = self.layer2(xout)
        xout = xout.reshape(xout.size(0), -1)
        # mask xout for test
        #xout.fill_(0)
        yout = F.relu(self.fc0(y))
        out = torch.cat((xout, yout), 1)
        out = F.relu(self.fc1(out))
        out = self.fc2(out)
        out = torch.tanh(out)
        return out

    def featureSize(self):
        return self.layer2(self.layer1(torch.zeros(1, 1, *self.inputShape))).view(1, -1).size(1)

    def select_action(self, state, noiseFlag = False):
        if noiseFlag:
            action = self.forward(state)
            action += torch.tensor(self.noise.get_noise(), dtype=torch.float32, device=config['device']).unsqueeze(0)
            action = torch.clamp(action, -1, 1)
            return action
        return self.forward(state)
Пример #3
0
class DDPGAgent:
    def __init__(self,
                 in_actor,
                 out_actor,
                 in_critic,
                 lr_actor=1.0e-2,
                 lr_critic=1.0e-2):
        super(DDPGAgent, self).__init__()

        self.actor = ActorNormLayers(in_actor, out_actor).to(device)
        self.target_actor = ActorNormLayers(in_actor, out_actor).to(device)

        self.critic = Critic(in_critic).to(device)
        self.target_critic = Critic(in_critic).to(device)

        self.noise = OUNoise(out_actor, scale=1.0)

        # initialize targets same as original networks
        hard_update(self.target_actor, self.actor)
        hard_update(self.target_critic, self.critic)

        self.actor_optimizer = Adam(list(self.actor.parameters()), lr=lr_actor)
        self.critic_optimizer = Adam(list(self.critic.parameters()),
                                     lr=lr_critic,
                                     weight_decay=1.e-5)

    def reset(self):
        self.noise.reset()

    def act(self, obs, noise=None):
        obs = obs.to(device)
        with torch.no_grad():
            mode = self.actor.training
            self.actor.eval()
            if noise is not None:
                e = noise * self.noise.noise().to(device)
            else:
                e = 0.0
            action = self.actor(obs) + e
            self.actor.train(mode)

        return torch.clamp(action, -1., 1.)

    def target_act(self, obs):
        obs = obs.to(device)
        with torch.no_grad():
            mode = self.target_actor.training
            self.target_actor.eval()
            action = self.target_actor(obs)
            self.target_actor.train(mode)  # As it was before

        return torch.clamp(action, -1., 1.)
Пример #4
0
class Actor(nn.Module):
    def __init__(self,
                 input_size,
                 hidden_size,
                 output_size,
                 learning_rate=3e-4):
        super(Actor, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.linear2 = nn.Linear(hidden_size, hidden_size)
        self.linear3 = nn.Linear(hidden_size, output_size)
        self.apply(xavier_init)
        self.noise = OUNoise(output_size,
                             seed=1,
                             mu=0.0,
                             theta=0.5,
                             max_sigma=0.05,
                             min_sigma=0.001,
                             decay_period=10000)
        self.noise.reset()

    def forward(self, state):
        """
        Param state is a torch tensor
        """
        x = F.relu(self.linear1(state))
        x = F.relu(self.linear2(x))
        action = torch.sigmoid(self.linear3(x))

        return action

    def select_action(self, state, noiseFlag=False):
        #noiseFlag = False
        if noiseFlag:
            action = self.forward(state)
            noise = self.noise.get_noise()
            action += torch.tensor(noise, dtype=torch.float32).unsqueeze(0)
            action = torch.clamp(action, 0, 1)
            return action
        else:
            return self.forward(state)
Пример #5
0
class Actor(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, config):
        super(Actor, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.linear2 = nn.Linear(hidden_size, hidden_size)
        self.linear3 = nn.Linear(hidden_size, output_size)

        self.apply(xavier_init)
        self.noise = OUNoise(output_size,
                             seed=1,
                             mu=0.0,
                             theta=0.1,
                             max_sigma=0.5,
                             min_sigma=0.1,
                             decay_period=100000)
        self.noise.reset()

        self.config = config
        self.stepCount = 0

    def forward(self, state):
        """
        Param state is a torch tensor
        """
        x = F.relu(self.linear1(state))
        x = F.relu(self.linear2(x))
        action = torch.tanh(self.linear3(x))
        return action

    def select_action(self, state, noiseFlag=False):
        if noiseFlag:
            action = self.forward(state)
            action += torch.tensor(self.noise.get_noise(),
                                   dtype=torch.float32,
                                   device=config['device']).unsqueeze(0)
            action = torch.clamp(action, -1, 1)
            return action
        return self.forward(state)
class Policy(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Policy, self).__init__()
        self.output_size = output_size

        self.linear1 = nn.Linear(input_size, hidden_size)
        self.linear2 = nn.Linear(hidden_size, hidden_size)

        self.V = nn.Linear(hidden_size, 1)

        self.mu = nn.Linear(hidden_size, output_size)

        self.L = nn.Linear(hidden_size, output_size**2)

        self.tril_mask = torch.tril(torch.ones(output_size, output_size),
                                    diagonal=-1).unsqueeze(0)
        self.diag_mask = torch.diag(
            torch.diag(torch.ones(output_size, output_size))).unsqueeze(0)

        self.noise = OUNoise(output_size,
                             seed=1,
                             mu=0.0,
                             theta=0.15,
                             max_sigma=0.3,
                             min_sigma=0.05,
                             decay_period=10000)
        self.noise.reset()
        self.apply(xavier_init)

    def forward(self, state, action):
        x = state
        u = action
        x = torch.tanh(self.linear1(x))
        x = torch.tanh(self.linear2(x))

        V = self.V(x)
        mu = torch.sigmoid(self.mu(x))

        Q = None
        if u is not None:
            num_outputs = mu.size(1)
            L = self.L(x).view(-1, num_outputs, num_outputs)
            L = L * \
                self.tril_mask.expand_as(
                    L) + torch.exp(L) * self.diag_mask.expand_as(L)
            P = torch.bmm(L, L.transpose(2, 1))

            u_mu = (u - mu).unsqueeze(2)
            A = -0.5 * \
                torch.bmm(torch.bmm(u_mu.transpose(2, 1), P), u_mu)[:, :, 0]

            Q = A + V

        return mu, Q, V

    def select_action(self, state, noiseFlag=True):
        action, _, _ = self.forward(state, None)

        if noiseFlag:
            action += torch.tensor(self.noise.get_noise(),
                                   dtype=torch.float32).unsqueeze(0)
            action = torch.clamp(action, 0, 1)

        return action

    def eval_Q_value(self, state, action):
        _, Q, _ = self.forward(state, action)
        return Q

    def eval_state_value(self, state):
        _, _, V = self.forward(state, None)
        return V
Пример #7
0
class Actor(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, config):
        super(Actor, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.linear2 = nn.Linear(hidden_size, hidden_size)
        self.linear3_1 = nn.Linear(hidden_size, 1)
        self.linear3_2 = nn.Linear(hidden_size, 1)
        self.apply(xavier_init)
        self.noise = OUNoise(output_size,
                             seed=1,
                             mu=0.0,
                             theta=0.15,
                             max_sigma=0.3,
                             min_sigma=0.05,
                             decay_period=10000)
        self.noise.reset()

        self.config = config
        self.stepCount = 0

    def forward(self, state):
        """
        Param state is a torch tensor
        """
        x = F.relu(self.linear1(state))
        x = F.relu(self.linear2(x))
        action0 = torch.sigmoid(self.linear3_1(x))
        action1 = torch.tanh(self.linear3_2(x))
        action = torch.cat([action0, action1], dim=1)
        return action

    def getCustomAction(self):

        if self.config['particleType'] == 'FULLCONTROL':
            choice = np.random.randint(0, 3)
            if choice == 0:
                action = np.array([1, 0])
            elif choice == 1:
                action = np.array([1, -1])
            elif choice == 2:
                action = np.array([1, 1])
        elif self.config['particleType'] == 'VANILLASP':
            action = np.array([1])
        elif self.config['particleType'] == 'CIRCLER':
            action = np.array([1])
        elif self.config['particleType'] == 'SLIDER':
            choice = np.random.randint(0, 3)
            if choice == 0:
                action = np.array([1])
            elif choice == 1:
                action = np.array([0])
            elif choice == 2:
                action = np.array([-1])

        return torch.tensor(action,
                            dtype=torch.float32,
                            device=self.config['device']).unsqueeze(0)

    # def select_action(self, state, noiseFlag = False):
    #     self.stepCount += 1
    #     if self.config['customExploreFlag'] and self.stepCount <= self.config['customExploreSteps']:
    #         action = self.getCustomAction()
    #         return action
    #     else:
    #         if noiseFlag:
    #             action = self.forward(state)
    #             action += torch.tensor(self.noise.get_noise(), dtype=torch.float32, device=self.config['device']).unsqueeze(0)
    #     return self.forward(state)

    def select_action(self, state, noiseFlag=False):
        if noiseFlag:
            action = self.forward(state)
            action += torch.tensor(self.noise.get_noise(),
                                   dtype=torch.float32,
                                   device=config['device']).unsqueeze(0)
            action = torch.clamp(action, -1, 1)
            if action[0][0] < 0.0:
                action[0][0] = 0.0
            return action
        return self.forward(state)