def __init__(self, num_inputs, recurrent=False, hidden_size = 64): super(MLPBase, self).__init__(recurrent, num_inputs, hidden_size) if recurrent: num_inputs = hidden_size init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), np.sqrt(2)) self.actor = nn.Sequential( init_(nn.Linear(num_inputs, hidden_size * 3)), nn.Tanh(), init_(nn.Linear(hidden_size * 3, hidden_size * 3)), nn.Tanh(), init_(nn.Linear(hidden_size * 3, hidden_size * 3)), nn.Tanh(), init_(nn.Linear(hidden_size * 3, hidden_size * 3)), nn.Tanh(), init_(nn.Linear(hidden_size * 3, hidden_size * 3)), nn.Tanh(), init_(nn.Linear(hidden_size * 3, hidden_size * 2)), nn.Tanh(), init_(nn.Linear(hidden_size * 2, hidden_size)), nn.Tanh(), init_(nn.Linear(hidden_size, hidden_size)), nn.Tanh()) self.critic = nn.Sequential( init_(nn.Linear(num_inputs, hidden_size * 3)), nn.Tanh(), init_(nn.Linear(hidden_size * 3, hidden_size * 3)), nn.Tanh(), init_(nn.Linear(hidden_size * 3, hidden_size * 3)), nn.Tanh(), init_(nn.Linear(hidden_size * 3, hidden_size * 3)), nn.Tanh(), init_(nn.Linear(hidden_size * 3, hidden_size * 3)), nn.Tanh(), init_(nn.Linear(hidden_size * 3, hidden_size * 2)), nn.Tanh(), init_(nn.Linear(hidden_size * 2, hidden_size)), nn.Tanh(), init_(nn.Linear(hidden_size, hidden_size)), nn.Tanh()) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.train()
def __init__(self, num_inputs, num_outputs): super(Bernoulli, self).__init__() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.linear = init_(nn.Linear(num_inputs, num_outputs))
def __init__(self, num_inputs, num_outputs): super(DiagGaussian, self).__init__() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.fc_mean = init_(nn.Linear(num_inputs, num_outputs)) self.logstd = AddBias(torch.zeros(num_outputs))
def __init__(self, num_inputs, recurrent=False, hidden_size=512): super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) self.main = nn.Sequential( init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(), init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(), init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(), init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU()) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.train()
def __init__(self, num_inputs, num_outputs): super(Categorical, self).__init__() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=0.01) self.linear = init_(nn.Linear(num_inputs, num_outputs))
def __init__(self, num_inputs, recurrent=False, hidden_size=256): super(CNNPro, self).__init__(recurrent, num_inputs, hidden_size) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) self.share = nn.Sequential( init_(nn.Conv2d(config.channel, 64, 3, stride=1, padding=1)), nn.ReLU(), init_(nn.Conv2d(64, 64, 3, stride=1, padding=1)), nn.ReLU(), init_(nn.Conv2d(64, 64, 3, stride=1, padding=1)), nn.ReLU(), init_(nn.Conv2d(64, 64, 3, stride=1, padding=1)), nn.ReLU(), init_(nn.Conv2d(64, 64, 3, stride=1, padding=1)), nn.ReLU(), ) pred_len = config.container_size[0] * config.container_size[1] if config.enable_rotation: pred_len = pred_len * 2 self.mask = nn.Sequential( init_(nn.Conv2d(64, 8, 1, stride=1)), nn.ReLU(), Flatten(), init_( nn.Linear(8 * config.pallet_size * config.pallet_size, hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, pred_len)), nn.ReLU(), # nn.Sigmoid(), ) self.actor = nn.Sequential( init_(nn.Conv2d(64, 8, 1, stride=1)), nn.ReLU(), Flatten(), init_( nn.Linear(8 * config.pallet_size * config.pallet_size, hidden_size)), nn.ReLU(), ) self.critic = nn.Sequential( init_(nn.Conv2d(64, 4, 1, stride=1)), nn.ReLU(), Flatten(), init_( nn.Linear(4 * config.pallet_size * config.pallet_size, hidden_size)), nn.ReLU(), ) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.train()