def __init__(self, x_size, resnet_hidden_size, processed_x_size, b_size, z_size, layers, samples_per_seq, t_diff_min, t_diff_max, t_diff_max_poss=10, action_space=0, action_dim=8, rl=False): super().__init__() self.layers = layers self.samples_per_seq = samples_per_seq self.t_diff_min = t_diff_min self.t_diff_max = t_diff_max self.t_diff_max_poss = t_diff_max_poss self.x_size = x_size self.processed_x_size = processed_x_size self.b_size = b_size self.z_size = z_size self.rl = rl # Input pre-process layer self.process_x = MinigridEncoder(x_size, resnet_hidden_size, processed_x_size) # Multilayer LSTM for aggregating belief states self.b_rnn = ops.MultilayerLSTM(input_size=processed_x_size+action_dim+1, hidden_size=b_size, layers=layers, every_layer_input=True, use_previous_higher=True) # Multilayer state model is used. Sampling is done by sampling higher layers first. self.z_b = nn.ModuleList([DBlock(b_size + (z_size if layer < layers - 1 else 0), layers * b_size, z_size) for layer in range(layers)]) # Given belief and state at time t2, infer the state at time t1 self.z1_z2_b = nn.ModuleList([DBlock(b_size + layers * z_size + (z_size if layer < layers - 1 else 0) + t_diff_max_poss, layers * b_size, z_size) for layer in range(layers)]) # Given the state at time t1, model state at time t2 through state transition self.z2_z1 = nn.ModuleList([DBlock(layers * z_size + action_dim + (z_size if layer < layers - 1 else 0) + t_diff_max_poss, layers * b_size, z_size) for layer in range(layers)]) # state to observation self.x_z = MinigridDecoder(x_size, z_dim=layers * z_size, d_hidden=resnet_hidden_size) # state to Q value per action if rl: self.g_z = ReturnsDecoder((layers * z_size * 2) + action_dim + t_diff_max_poss, layers * b_size) self.q_z = QNetwork(layers * z_size, layers * b_size, action_space) self.action_embedding = nn.Embedding(action_space, action_dim) self.time_encoding = torch.zeros(t_diff_max_poss, t_diff_max_poss) for i in range(t_diff_max_poss): self.time_encoding[i, :i+1] = 1 self.time_encoding = nn.Parameter(self.time_encoding.float(), requires_grad=False)
def __init__(self, x_size, processed_x_size, b_size, z_size, layers, samples_per_seq, t_diff_min, t_diff_max): super().__init__() self.layers = layers self.samples_per_seq = samples_per_seq self.t_diff_min = t_diff_min self.t_diff_max = t_diff_max x_size = x_size processed_x_size = processed_x_size b_size = b_size z_size = z_size # Input pre-process layer self.process_x = PreProcess(x_size, processed_x_size) # Multilayer LSTM for aggregating belief states self.b_rnn = ops.MultilayerLSTM(input_size=processed_x_size, hidden_size=b_size, layers=layers, every_layer_input=True, use_previous_higher=True) # Multilayer state model is used. Sampling is done by sampling higher layers first. self.z_b = nn.ModuleList([ DBlock(b_size + (z_size if layer < layers - 1 else 0), 50, z_size) for layer in range(layers) ]) # Given belief and state at time t2, infer the state at time t1 self.z1_z2_b1 = nn.ModuleList([ DBlock( b_size + layers * z_size + (z_size if layer < layers - 1 else 0), 50, z_size) for layer in range(layers) ]) # Given the state at time t1, model state at time t2 through state transition self.z2_z1 = nn.ModuleList([ DBlock(layers * z_size + (z_size if layer < layers - 1 else 0), 50, z_size) for layer in range(layers) ]) # state to observation self.x_z = Decoder(layers * z_size, 200, x_size)
def __init__(self, x_size, resnet_hidden_size, processed_x_size, b_size, layers, samples_per_seq, action_space=0, action_dim=8, rl=False): super().__init__() self.layers = layers self.samples_per_seq = samples_per_seq self.x_size = x_size self.processed_x_size = processed_x_size self.b_size = b_size self.rl = rl # Input pre-process layer if len(x_size) > 1: self.process_x = ConvPreProcess(x_size, resnet_hidden_size, processed_x_size) else: self.process_x = PreProcess(x_size, processed_x_size) # Multilayer LSTM for aggregating belief states self.b_rnn = ops.MultilayerLSTM(input_size=processed_x_size + action_dim + 1, hidden_size=b_size, layers=layers, every_layer_input=True, use_previous_higher=True) # state to Q value per action if rl: self.q_z = QNetwork(layers * b_size, layers * b_size, action_space) self.action_embedding = nn.Embedding(action_space, action_dim)
def __init__(self, x_size, resnet_hidden_size, processed_x_size, b_size, z_size, layers, samples_per_seq, t_diff_min, t_diff_max, t_diff_max_poss=10, action_space=0, action_dim=8, rl=False): super().__init__() self.layers = layers self.samples_per_seq = samples_per_seq self.t_diff_min = t_diff_min self.t_diff_max = t_diff_max self.t_diff_max_poss = t_diff_max_poss self.x_size = x_size self.processed_x_size = processed_x_size self.b_size = b_size self.z_size = z_size self.rl = rl # Input pre-process layer if len(x_size) > 1: self.process_x = ConvPreProcess(x_size, resnet_hidden_size, processed_x_size) else: self.process_x = PreProcess(x_size, processed_x_size) # Multilayer LSTM for aggregating belief states self.b_rnn = ops.MultilayerLSTM(input_size=processed_x_size + action_dim + 1, hidden_size=b_size, layers=layers, every_layer_input=True, use_previous_higher=True) # Multilayer state model is used. Sampling is done by sampling higher layers first. self.z_b = nn.ModuleList([ DBlock(b_size + (z_size if layer < layers - 1 else 0), layers * b_size, z_size) for layer in range(layers) ]) # Given belief and state at time t2, infer the state at time t1 self.z1_z2_b = nn.ModuleList([ DBlock( b_size + layers * z_size + (z_size if layer < layers - 1 else 0) + (t_diff_max_poss - t_diff_min), layers * b_size, z_size) for layer in range(layers) ]) # Given the state at time t1, model state at time t2 through state transition self.z2_z1 = nn.ModuleList([ DBlock( layers * z_size + action_dim + (z_size if layer < layers - 1 else 0) + (t_diff_max_poss - t_diff_min), layers * b_size, z_size) for layer in range(layers) ]) # state to observation # self.x_z = ConvDecoder(layers * z_size, resnet_hidden_size, x_size) self.x_z = SAGANGenerator(x_size, z_dim=layers * z_size, d_hidden=resnet_hidden_size) # state to Q value per action if rl: self.g_z = ReturnsDecoder((layers * z_size * 2) + action_dim + (t_diff_max_poss - t_diff_min), layers * b_size) self.q_z = QNetwork(layers * z_size, layers * b_size, action_space) self.action_embedding = nn.Embedding(action_space, action_dim) self.time_encoding = nn.Embedding(t_diff_max_poss - t_diff_min + 1, t_diff_max_poss - t_diff_min) for param in self.time_encoding.parameters(): param.requires_grad = False param.zero_() for i in range(t_diff_max_poss - t_diff_min + 1): self.time_encoding.weight[i, :i] = 1.0 self.time_encoding_scale = nn.Parameter(torch.ones(1))