def __init__(self, num_inputs, action_space, normalize=False, name=None): super(ActorCritic, self).__init__() self._name = name self.conv1 = nn.Conv2d(in_channels=num_inputs, out_channels=32, kernel_size=8, stride=4) self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=4, stride=2) self.conv3 = nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, stride=1) conv_out_size = self._get_conv_out((num_inputs, 84, 84)) self.linear1 = nn.Linear(in_features=conv_out_size, out_features=512) self.critic_linear = nn.Linear(in_features=512, out_features=1) self.actor_linear = nn.Linear(in_features=512, out_features=action_space.n) self.apply(weights_init) relu_gain = nn.init.calculate_gain('relu') self.conv1.weight.data.mul_(relu_gain) self.conv2.weight.data.mul_(relu_gain) self.conv3.weight.data.mul_(relu_gain) self.linear1.weight.data.mul_(relu_gain) self.ob_rms = RunningMeanStd(shape=(84, 84)) if normalize else None
def __init__(self, args, action_space): super(DQN, self).__init__() self.categorical = args.categorical self.dueling = args.dueling self.atoms = args.atoms if args.categorical else 1 self.action_space = action_space Linear = NoisyLinear if args.noisy_linear else nn.Linear self.conv = nn.Sequential( nn.Conv2d(in_channels=args.history_length, out_channels=32, kernel_size=8, stride=4), nn.ReLU(), nn.Conv2d(in_channels=32, out_channels=64, kernel_size=4, stride=2), nn.ReLU(), nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1), nn.ReLU()) conv_out_size = self._get_conv_out((args.history_length, 84, 84)) # TODO: Add std_init argument to noisy linear constructors self.fc_a = nn.Sequential( Linear(in_features=conv_out_size, out_features=args.hidden_size), nn.ReLU(), Linear(in_features=args.hidden_size, out_features=action_space * self.atoms), ) if args.dueling: self.fc_v = nn.Sequential( Linear(in_features=conv_out_size, out_features=args.hidden_size), nn.ReLU(), Linear(in_features=args.hidden_size, out_features=self.atoms), ) self.apply(weights_init) self.ob_rms = RunningMeanStd(shape=(84, 84)) if args.normalize else None
class ActorCritic(nn.Module): def __init__(self, num_inputs, action_space, normalize=False, name=None): super(ActorCritic, self).__init__() self._name = name self.conv1 = nn.Conv2d(in_channels=num_inputs, out_channels=32, kernel_size=8, stride=4) self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=4, stride=2) self.conv3 = nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, stride=1) conv_out_size = self._get_conv_out((num_inputs, 84, 84)) self.linear1 = nn.Linear(in_features=conv_out_size, out_features=512) self.critic_linear = nn.Linear(in_features=512, out_features=1) self.actor_linear = nn.Linear(in_features=512, out_features=action_space.n) self.apply(weights_init) relu_gain = nn.init.calculate_gain('relu') self.conv1.weight.data.mul_(relu_gain) self.conv2.weight.data.mul_(relu_gain) self.conv3.weight.data.mul_(relu_gain) self.linear1.weight.data.mul_(relu_gain) self.ob_rms = RunningMeanStd(shape=(84, 84)) if normalize else None def _get_conv_out(self, shape): o = self.conv1(torch.zeros(1, *shape)) o = self.conv2(o) o = self.conv3(o) return int(np.prod(o.size())) def forward(self, x): with torch.no_grad(): if self.ob_rms: if self.training: self.ob_rms.update(x) mean = self.ob_rms.mean.to(dtype=torch.float32, device=x.device) std = torch.sqrt( self.ob_rms.var.to(dtype=torch.float32, device=x.device) + float(np.finfo(np.float32).eps)) x = (x - mean) / std x = x.to(dtype=self.conv1.weight.dtype) x = F.relu(self.conv1(x)) x = F.relu(self.conv2(x)) x = F.relu(self.conv3(x)) x = x.view(x.size(0), -1) x = F.relu(self.linear1(x)) return self.critic_linear(x), self.actor_linear(x) def name(self): return self._name def save(self): if self.name(): name = '{}.pth'.format(self.name()) torch.save(self.state_dict(), name) def load(self, name=None): self.load_state_dict(torch.load(name if name else self.name()))
class DQN(nn.Module): def __init__(self, args, action_space): super(DQN, self).__init__() self.categorical = args.categorical self.dueling = args.dueling self.atoms = args.atoms if args.categorical else 1 self.action_space = action_space Linear = NoisyLinear if args.noisy_linear else nn.Linear self.conv = nn.Sequential( nn.Conv2d(in_channels=args.history_length, out_channels=32, kernel_size=8, stride=4), nn.ReLU(), nn.Conv2d(in_channels=32, out_channels=64, kernel_size=4, stride=2), nn.ReLU(), nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1), nn.ReLU()) conv_out_size = self._get_conv_out((args.history_length, 84, 84)) # TODO: Add std_init argument to noisy linear constructors self.fc_a = nn.Sequential( Linear(in_features=conv_out_size, out_features=args.hidden_size), nn.ReLU(), Linear(in_features=args.hidden_size, out_features=action_space * self.atoms), ) if args.dueling: self.fc_v = nn.Sequential( Linear(in_features=conv_out_size, out_features=args.hidden_size), nn.ReLU(), Linear(in_features=args.hidden_size, out_features=self.atoms), ) self.apply(weights_init) self.ob_rms = RunningMeanStd(shape=(84, 84)) if args.normalize else None def _get_conv_out(self, shape): o = self.conv(torch.zeros(1, *shape)) return int(np.prod(o.size())) def forward(self, x, log=False): with torch.no_grad(): if self.ob_rms: if self.training: self.ob_rms.update(x) mean = self.ob_rms.mean.to(dtype=torch.float32, device=x.device) std = torch.sqrt( self.ob_rms.var.to(dtype=torch.float32, device=x.device) + float(np.finfo(np.float32).eps)) x = (x - mean) / std conv_out = self.conv(x).view(x.size(0), -1) a = self.fc_a(conv_out).view(-1, self.action_space, self.atoms) if self.dueling: v = self.fc_v(conv_out).view(-1, 1, self.atoms) q = v + a - a.mean(1, keepdim=True) # Combine streams else: q = a if self.categorical: if log: # Use log softmax for numerical stability q = F.log_softmax( q, dim=2 ) # Log probabilities with action over second dimension else: q = F.softmax( q, dim=2) # Probabilities with action over second dimension return q def reset_noise(self): for m in self.fc_a.modules(): if isinstance(m, NoisyLinear): m.reset_noise() if self.dueling: for m in self.fc_v.modules(): if isinstance(m, NoisyLinear): m.reset_noise()