Пример #1
0
    def __init__(self, observation_space, action_space, n_frames, args):
        super(ActorCritic, self).__init__()

        # State preprocessing
        self.senc_nngrid = senc_NNGrid(args)
        self.frame_stack = FrameStack(n_frames)

        self.observation_space = observation_space
        self.action_space = action_space

        self.input_size = self.senc_nngrid.observation_space.shape
        self.output_size = int(np.prod(self.action_space.shape))

        self.conv1 = nn.Conv2d(self.frame_stack.n_frames * self.input_size[0],
                               32,
                               4,
                               stride=2,
                               padding=0)
        self.conv2 = nn.Conv2d(32, 64, 3, stride=2, padding=0)
        self.conv3 = nn.Conv2d(64, 128, 3, stride=1, padding=0)

        # Calculate conv->linear size
        dummy_input = Variable(
            torch.zeros((
                1,
                n_frames,
            ) + self.senc_nngrid.observation_space.shape))
        dummy_input = dummy_input.view((
            1,
            n_frames * self.input_size[0],
        ) + self.input_size[1:])
        outconv = self._convforward(dummy_input)

        self.lstm = nn.LSTMCell(outconv.nelement(), 128)

        self.critic_linear = nn.Linear(128, 1)
        self.actor_linear = nn.Linear(128, self.action_space.shape[0])
        self.actor_linear2 = nn.Linear(128, self.action_space.shape[0])

        self.apply(weights_init)
        lrelu_gain = nn.init.calculate_gain('leaky_relu')
        self.conv1.weight.data.mul_(lrelu_gain)
        self.conv2.weight.data.mul_(lrelu_gain)
        self.conv3.weight.data.mul_(lrelu_gain)

        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.actor_linear2.weight.data = norm_col_init(
            self.actor_linear2.weight.data, 0.01)
        self.actor_linear2.bias.data.fill_(0)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.train()
Пример #2
0
    def __init__(self, observation_space, action_space, n_frames, args):
        super(ActorCritic, self).__init__()

        # State preprocessing
        self.senc_nngrid = senc_NNGrid(args)
        self.frame_stack = FrameStack(n_frames)

        # Action postprocessing
        self.adec_nngrid = adec_NNGrid(action_space, args)

        self.observation_space = observation_space
        self.action_space = action_space

        self.input_size = self.senc_nngrid.observation_space.shape
        self.output_size = int(np.prod(self.action_space.shape))

        _s = [32, 64, 128, 128]
        self.convlstm1 = ConvLSTM(self.frame_stack.n_frames *
                                  self.input_size[0],
                                  32,
                                  4,
                                  stride=1,
                                  padding=1)
        self.convlstm2 = ConvLSTM(32, 64, 3, stride=1, padding=1)
        self.convlstm3 = ConvLSTM(64, 128, 3, stride=1, padding=1)
        self.convlstm4 = ConvLSTM(128, 128, 3, stride=1, padding=1)
        self.convlstm = [
            self.convlstm1,
            self.convlstm2,
            self.convlstm3,
            self.convlstm4,
        ]
        _is = (n_frames * self.input_size[0], ) + self.input_size[1:]
        self.memsizes = []
        for i in range(len(self.convlstm)):
            _is = self.convlstm[i]._spatial_size_output_given_input((1, ) +
                                                                    _is)
            _is = (_s[i], ) + _is
            self.memsizes.append(copy.deepcopy(_is))

        self.critic_linear = nn.Conv2d(128, 1, 3, stride=1, padding=1)
        self.actor_linear = nn.Conv2d(128, 1, 3, stride=1, padding=1)
        self.actor_linear2 = nn.Conv2d(128, 1, 3, stride=1, padding=1)

        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.actor_linear2.weight.data = norm_col_init(
            self.actor_linear2.weight.data, 0.01)
        self.actor_linear2.bias.data.fill_(0)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.train()
    def __init__(self, observation_space, action_space, n_frames, args):
        super(ActorCritic, self).__init__()

        # Stack preprocessing
        self.frame_stack = FrameStack(n_frames)

        self.observation_space = observation_space
        self.action_space = action_space

        self.input_size = int(np.prod(self.observation_space.shape))
        self.output_size = int(np.prod(self.action_space.shape))

        self.conv1 = nn.Conv1d(self.frame_stack.n_frames,
                               32,
                               3,
                               stride=1,
                               padding=1)
        self.lrelu1 = nn.LeakyReLU(0.1)
        self.conv2 = nn.Conv1d(32, 32, 3, stride=1, padding=1)
        self.lrelu2 = nn.LeakyReLU(0.1)
        self.conv3 = nn.Conv1d(32, 64, 2, stride=1, padding=1)
        self.lrelu3 = nn.LeakyReLU(0.1)
        self.conv4 = nn.Conv1d(64, 64, 1, stride=1)
        self.lrelu4 = nn.LeakyReLU(0.1)

        dummy_input = Variable(
            torch.zeros(1, self.frame_stack.n_frames, self.input_size))
        dummy_conv_output = self._convforward(dummy_input)

        self.lstm = nn.LSTMCell(dummy_conv_output.nelement(), 128)
        self.critic_linear = nn.Linear(128, 1)
        self.actor_linear = nn.Linear(128, self.output_size)
        self.actor_linear2 = nn.Linear(128, self.output_size)

        self.apply(weights_init)
        lrelu_gain = nn.init.calculate_gain('leaky_relu')
        self.conv1.weight.data.mul_(lrelu_gain)
        self.conv2.weight.data.mul_(lrelu_gain)
        self.conv3.weight.data.mul_(lrelu_gain)
        self.conv4.weight.data.mul_(lrelu_gain)

        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.actor_linear2.weight.data = norm_col_init(
            self.actor_linear2.weight.data, 0.01)
        self.actor_linear2.bias.data.fill_(0)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.lstm.bias_ih.data.fill_(0)
        self.lstm.bias_hh.data.fill_(0)

        self.train()
Пример #4
0
    def __init__(self, observation_space, action_space, n_frames, args):
        super(ActorCritic, self).__init__()

        # State preprocessing
        # Note: only works for 1d observation spaces
        args['observation_dim'] = observation_space.shape[0]
        self.senc_nngrid = senc_FlatDepthNNGrid(args)
        self.frame_stack = FrameStack(n_frames)

        self.observation_space = observation_space
        self.action_space = action_space

        self.input_size = self.senc_nngrid.observation_space.shape[0]
        self.output_size = int(np.prod(self.action_space.shape))

        self.fc1 = nn.Linear(self.input_size, 256)
        self.lrelu1 = nn.LeakyReLU(0.1)
        self.fc2 = nn.Linear(256, 256)
        self.lrelu2 = nn.LeakyReLU(0.1)
        self.fc3 = nn.Linear(256, 128)
        self.lrelu3 = nn.LeakyReLU(0.1)
        self.fc4 = nn.Linear(128, 128)
        self.lrelu4 = nn.LeakyReLU(0.1)

        self.m1 = self.frame_stack.n_frames * 128
        self.lstm = nn.LSTMCell(self.m1, 128)
        self.critic_linear = nn.Linear(128, 1)
        self.actor_linear = nn.Linear(128, self.output_size)
        self.actor_linear2 = nn.Linear(128, self.output_size)

        self.apply(weights_init_mlp)
        lrelu = nn.init.calculate_gain('leaky_relu')
        self.fc1.weight.data.mul_(lrelu)
        self.fc2.weight.data.mul_(lrelu)
        self.fc3.weight.data.mul_(lrelu)
        self.fc4.weight.data.mul_(lrelu)

        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.actor_linear2.weight.data = norm_col_init(
            self.actor_linear2.weight.data, 0.01)
        self.actor_linear2.bias.data.fill_(0)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.lstm.bias_ih.data.fill_(0)
        self.lstm.bias_hh.data.fill_(0)

        self.train()
Пример #5
0
    def __init__(self, observation_space, action_space, n_frames, args):
        super(ActorCritic, self).__init__()

        # State preprocessing
        self.senc_nngrid = senc_NNGrid(args)
        self.frame_stack = FrameStack(n_frames)

        # Action postprocessing
        self.adec_nngrid = adec_NNGrid(action_space, args)

        self.observation_space = observation_space
        self.action_space = action_space

        self.input_size = self.senc_nngrid.observation_space.shape
        self.output_size = int(np.prod(self.action_space.shape))

        self.conv1 = nn.Conv2d(self.frame_stack.n_frames * self.input_size[0],
                               128,
                               3,
                               stride=1,
                               padding=1)
        self.conv2 = nn.Conv2d(128, 128, 3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(128, 128, 3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(128, 128, 3, stride=1, padding=1)

        self.critic_linear = nn.Conv2d(128, 2, 3, stride=1, padding=1)
        self.actor_linear = nn.Conv2d(128, 2, 3, stride=1, padding=1)
        self.actor_linear2 = nn.Conv2d(128, 2, 3, stride=1, padding=1)

        self.apply(weights_init)
        lrelu_gain = nn.init.calculate_gain('leaky_relu')
        self.conv1.weight.data.mul_(lrelu_gain)
        self.conv2.weight.data.mul_(lrelu_gain)
        self.conv3.weight.data.mul_(lrelu_gain)
        self.conv4.weight.data.mul_(lrelu_gain)

        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.actor_linear2.weight.data = norm_col_init(
            self.actor_linear2.weight.data, 0.01)
        self.actor_linear2.bias.data.fill_(0)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.train()
Пример #6
0
    def __init__(self, observation_space, action_space, n_frames, args):
        super(ActorCritic, self).__init__()

        # State preprocessing
        self.senc_nngrid = senc_NNGrid(args)
        self.frame_stack = FrameStack(n_frames)
        self.old_anchor = (self.senc_nngrid.grid_anchor_x,
                           self.senc_nngrid.grid_anchor_y)

        # Action postprocessing
        self.adec_nngrid = adec_NNGrid(action_space, args)

        self.observation_space = observation_space
        self.action_space = action_space

        self.input_size = self.senc_nngrid.observation_space.shape
        self.output_size = int(np.prod(self.action_space.shape))

        _s = [32, 64, 128, 128]
        self.convlstm1 = ConvLSTM(self.frame_stack.n_frames *
                                  self.input_size[0],
                                  32,
                                  3,
                                  stride=1,
                                  padding=1)
        self.convlstm2 = ConvLSTM(32, 64, 3, stride=1, padding=1)
        self.convlstm3 = ConvLSTM(64, 128, 3, stride=1, padding=1)
        self.convlstm4 = ConvLSTM(128, 128, 3, stride=1, padding=1)
        self.convlstm = [
            self.convlstm1,
            self.convlstm2,
            self.convlstm3,
            self.convlstm4,
        ]

        # TODO(eparisot): add code that initializes new gridcells with learnable parameter vector
        _is = (n_frames * self.input_size[0], ) + self.input_size[1:]
        self.convh0 = []
        self.convc0 = []
        self.memsizes = []
        for i in range(len(self.convlstm)):
            _is = self.convlstm[i]._spatial_size_output_given_input((1, ) +
                                                                    _is)
            _is = (_s[i], ) + _is
            self.memsizes.append(copy.deepcopy(_is))
            self.convh0.append(
                nn.Parameter(torch.zeros((1, ) + self.memsizes[i])))
            self.convc0.append(
                nn.Parameter(torch.zeros((1, ) + self.memsizes[i])))
        self._convh0_module = nn.ParameterList(self.convh0)
        self._convc0_module = nn.ParameterList(self.convc0)

        self.critic_linear = nn.Conv2d(128, 2, 3, stride=1, padding=1)
        self.actor_linear = nn.Conv2d(128, 2, 3, stride=1, padding=1)
        self.actor_linear2 = nn.Conv2d(128, 2, 3, stride=1, padding=1)

        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.actor_linear2.weight.data = norm_col_init(
            self.actor_linear2.weight.data, 0.01)
        self.actor_linear2.bias.data.fill_(0)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.train()