Пример #1
0
    def __init__(self, x_size, resnet_hidden_size, processed_x_size, b_size, z_size, layers, samples_per_seq,
                 t_diff_min, t_diff_max, t_diff_max_poss=10, action_space=0, action_dim=8, rl=False):
        super().__init__()
        self.layers = layers
        self.samples_per_seq = samples_per_seq
        self.t_diff_min = t_diff_min
        self.t_diff_max = t_diff_max
        self.t_diff_max_poss = t_diff_max_poss

        self.x_size = x_size
        self.processed_x_size = processed_x_size
        self.b_size = b_size
        self.z_size = z_size

        self.rl = rl

        # Input pre-process layer
        self.process_x = MinigridEncoder(x_size, resnet_hidden_size, processed_x_size)

        # Multilayer LSTM for aggregating belief states
        self.b_rnn = ops.MultilayerLSTM(input_size=processed_x_size+action_dim+1, hidden_size=b_size, layers=layers,
                                        every_layer_input=True, use_previous_higher=True)

        # Multilayer state model is used. Sampling is done by sampling higher layers first.
        self.z_b = nn.ModuleList([DBlock(b_size + (z_size if layer < layers - 1 else 0), layers * b_size, z_size)
                                  for layer in range(layers)])

        # Given belief and state at time t2, infer the state at time t1
        self.z1_z2_b = nn.ModuleList([DBlock(b_size + layers * z_size + (z_size if layer < layers - 1 else 0)
                                             + t_diff_max_poss, layers * b_size, z_size)
                                      for layer in range(layers)])

        # Given the state at time t1, model state at time t2 through state transition
        self.z2_z1 = nn.ModuleList([DBlock(layers * z_size + action_dim +
                                           (z_size if layer < layers - 1 else 0) + t_diff_max_poss,
                                           layers * b_size, z_size)
                                    for layer in range(layers)])

        # state to observation
        self.x_z = MinigridDecoder(x_size, z_dim=layers * z_size, d_hidden=resnet_hidden_size)

        # state to Q value per action
        if rl:
            self.g_z = ReturnsDecoder((layers * z_size * 2) + action_dim + t_diff_max_poss, layers * b_size)
            self.q_z = QNetwork(layers * z_size, layers * b_size, action_space)

        self.action_embedding = nn.Embedding(action_space, action_dim)

        self.time_encoding = torch.zeros(t_diff_max_poss, t_diff_max_poss)
        for i in range(t_diff_max_poss):
            self.time_encoding[i, :i+1] = 1
        self.time_encoding = nn.Parameter(self.time_encoding.float(), requires_grad=False)
Пример #2
0
    def __init__(self, x_size, processed_x_size, b_size, z_size, layers,
                 samples_per_seq, t_diff_min, t_diff_max):
        super().__init__()
        self.layers = layers
        self.samples_per_seq = samples_per_seq
        self.t_diff_min = t_diff_min
        self.t_diff_max = t_diff_max

        x_size = x_size
        processed_x_size = processed_x_size
        b_size = b_size
        z_size = z_size

        # Input pre-process layer
        self.process_x = PreProcess(x_size, processed_x_size)

        # Multilayer LSTM for aggregating belief states
        self.b_rnn = ops.MultilayerLSTM(input_size=processed_x_size,
                                        hidden_size=b_size,
                                        layers=layers,
                                        every_layer_input=True,
                                        use_previous_higher=True)

        # Multilayer state model is used. Sampling is done by sampling higher layers first.
        self.z_b = nn.ModuleList([
            DBlock(b_size + (z_size if layer < layers - 1 else 0), 50, z_size)
            for layer in range(layers)
        ])

        # Given belief and state at time t2, infer the state at time t1
        self.z1_z2_b1 = nn.ModuleList([
            DBlock(
                b_size + layers * z_size +
                (z_size if layer < layers - 1 else 0), 50, z_size)
            for layer in range(layers)
        ])

        # Given the state at time t1, model state at time t2 through state transition
        self.z2_z1 = nn.ModuleList([
            DBlock(layers * z_size + (z_size if layer < layers - 1 else 0), 50,
                   z_size) for layer in range(layers)
        ])

        # state to observation
        self.x_z = Decoder(layers * z_size, 200, x_size)
Пример #3
0
    def __init__(self,
                 x_size,
                 resnet_hidden_size,
                 processed_x_size,
                 b_size,
                 layers,
                 samples_per_seq,
                 action_space=0,
                 action_dim=8,
                 rl=False):
        super().__init__()
        self.layers = layers
        self.samples_per_seq = samples_per_seq

        self.x_size = x_size
        self.processed_x_size = processed_x_size
        self.b_size = b_size

        self.rl = rl

        # Input pre-process layer
        if len(x_size) > 1:
            self.process_x = ConvPreProcess(x_size, resnet_hidden_size,
                                            processed_x_size)
        else:
            self.process_x = PreProcess(x_size, processed_x_size)

        # Multilayer LSTM for aggregating belief states
        self.b_rnn = ops.MultilayerLSTM(input_size=processed_x_size +
                                        action_dim + 1,
                                        hidden_size=b_size,
                                        layers=layers,
                                        every_layer_input=True,
                                        use_previous_higher=True)

        # state to Q value per action
        if rl:
            self.q_z = QNetwork(layers * b_size, layers * b_size, action_space)

        self.action_embedding = nn.Embedding(action_space, action_dim)
Пример #4
0
    def __init__(self,
                 x_size,
                 resnet_hidden_size,
                 processed_x_size,
                 b_size,
                 z_size,
                 layers,
                 samples_per_seq,
                 t_diff_min,
                 t_diff_max,
                 t_diff_max_poss=10,
                 action_space=0,
                 action_dim=8,
                 rl=False):
        super().__init__()
        self.layers = layers
        self.samples_per_seq = samples_per_seq
        self.t_diff_min = t_diff_min
        self.t_diff_max = t_diff_max
        self.t_diff_max_poss = t_diff_max_poss

        self.x_size = x_size
        self.processed_x_size = processed_x_size
        self.b_size = b_size
        self.z_size = z_size

        self.rl = rl

        # Input pre-process layer
        if len(x_size) > 1:
            self.process_x = ConvPreProcess(x_size, resnet_hidden_size,
                                            processed_x_size)
        else:
            self.process_x = PreProcess(x_size, processed_x_size)

        # Multilayer LSTM for aggregating belief states
        self.b_rnn = ops.MultilayerLSTM(input_size=processed_x_size +
                                        action_dim + 1,
                                        hidden_size=b_size,
                                        layers=layers,
                                        every_layer_input=True,
                                        use_previous_higher=True)

        # Multilayer state model is used. Sampling is done by sampling higher layers first.
        self.z_b = nn.ModuleList([
            DBlock(b_size + (z_size if layer < layers - 1 else 0),
                   layers * b_size, z_size) for layer in range(layers)
        ])

        # Given belief and state at time t2, infer the state at time t1
        self.z1_z2_b = nn.ModuleList([
            DBlock(
                b_size + layers * z_size +
                (z_size if layer < layers - 1 else 0) +
                (t_diff_max_poss - t_diff_min), layers * b_size, z_size)
            for layer in range(layers)
        ])

        # Given the state at time t1, model state at time t2 through state transition
        self.z2_z1 = nn.ModuleList([
            DBlock(
                layers * z_size + action_dim +
                (z_size if layer < layers - 1 else 0) +
                (t_diff_max_poss - t_diff_min), layers * b_size, z_size)
            for layer in range(layers)
        ])

        # state to observation
        # self.x_z = ConvDecoder(layers * z_size, resnet_hidden_size, x_size)
        self.x_z = SAGANGenerator(x_size,
                                  z_dim=layers * z_size,
                                  d_hidden=resnet_hidden_size)

        # state to Q value per action
        if rl:
            self.g_z = ReturnsDecoder((layers * z_size * 2) + action_dim +
                                      (t_diff_max_poss - t_diff_min),
                                      layers * b_size)
            self.q_z = QNetwork(layers * z_size, layers * b_size, action_space)

        self.action_embedding = nn.Embedding(action_space, action_dim)
        self.time_encoding = nn.Embedding(t_diff_max_poss - t_diff_min + 1,
                                          t_diff_max_poss - t_diff_min)
        for param in self.time_encoding.parameters():
            param.requires_grad = False
            param.zero_()
        for i in range(t_diff_max_poss - t_diff_min + 1):
            self.time_encoding.weight[i, :i] = 1.0
        self.time_encoding_scale = nn.Parameter(torch.ones(1))