class ActorNetwork(torch.nn.Module): def __init__(self, hp): super().__init__() self._hp = hp if self._hp.low_dim: self.encoder = Linear(in_dim=self._hp.state_size * 2, out_dim=128, builder=self._hp.builder) out_size = 128 else: self.encoder = ConvEncoder(self._hp) out_size = self.encoder.get_output_size()[0] * 5 * 5 self.mlp = torch.nn.Sequential() self.mlp.add_module( 'linear_1', Linear(in_dim=out_size, out_dim=128, builder=self._hp.builder)) for i in range(10): self.mlp.add_module( f'linear_{i+2}', Linear(in_dim=128, out_dim=128, builder=self._hp.builder)) self.mlp.add_module(f'relu_{i+2}', torch.nn.ReLU()) self.mlp.add_module( 'linear_final', Linear(in_dim=128, out_dim=self._hp.action_size, builder=self._hp.builder)) self.mlp.add_module('tanh', torch.nn.Tanh()) def forward(self, image_pairs): embeddings = self.encoder(image_pairs).reshape(image_pairs.size(0), -1) return self.mlp(embeddings)
class VAE(torch.nn.Module): def __init__(self, hp): super().__init__() self._hp = hp self.encoder = ConvEncoder(self._hp) out_size = self.encoder.get_output_size() out_flat_size = out_size[0] * out_size[1] * out_size[2] self.linear1 = Linear(in_dim=out_flat_size, out_dim=128, builder=self._hp.builder) self.linear2 = Linear(in_dim=128, out_dim=self._hp.hidden_size * 2, builder=self._hp.builder) self.linear3 = Linear(in_dim=self._hp.hidden_size, out_dim=128, builder=self._hp.builder) self.linear4 = Linear(in_dim=128, out_dim=out_flat_size, builder=self._hp.builder) self.decoder = ConvDecoder(self._hp) def reparameterize(self, mu, logvar): std = torch.exp(0.5 * logvar) eps = torch.randn_like(std) return mu + eps * std def encode(self, image): embeddings = self.encoder(image).reshape(image.size(0), -1) e = F.relu(self.linear1(embeddings)) z = self.linear2(e) mu, logvar = z[:, :self._hp.hidden_size], z[:, self._hp.hidden_size:] return mu, logvar def decode(self, z): e = F.relu(self.linear3(z)) e = F.relu(self.linear4(e)) e = e.view(*([e.size(0)] + list(self.encoder.get_output_size()))) im = F.sigmoid(self.decoder(e)) return im def forward(self, image): mu, logvar = self.encode(image) z = self.reparameterize(mu, logvar) im = self.decode(z) return mu, logvar, z, im
class QNetwork(torch.nn.Module): def __init__(self, hp, num_outputs): super().__init__() self._hp = hp self.num_outputs = num_outputs self.ll_size = ll_size = self._hp.linear_layer_size self.fc_layers = torch.nn.ModuleList() if self._hp.low_dim: self.linear1 = Linear(in_dim=2 * self._hp.state_size, out_dim=ll_size, builder=self._hp.builder) self.linear2 = Linear(in_dim=ll_size + self._hp.action_size, out_dim=ll_size, builder=self._hp.builder) else: self.encoder = ConvEncoder(self._hp) out_size = self.encoder.get_output_size() self.linear1 = Linear(in_dim=out_size[0] * out_size[1] * out_size[2], out_dim=ll_size, builder=self._hp.builder) self.linear2 = Linear(in_dim=ll_size + self._hp.action_size, out_dim=ll_size, builder=self._hp.builder) for i in range(3): self.fc_layers.append( Linear(in_dim=ll_size, out_dim=ll_size, builder=self._hp.builder)) self.fc_layers.append( Linear(in_dim=ll_size, out_dim=self.num_outputs, builder=self._hp.builder)) def forward(self, image_pairs, actions): if self._hp.low_dim: embeddings = image_pairs else: embeddings = self.encoder(image_pairs).reshape( image_pairs.size(0), -1) x = F.relu(self.linear1(embeddings)) x = torch.cat([x, actions], dim=1) x = F.relu(self.linear2(x)) for layer in self.fc_layers: x = F.relu(layer(x)) return x
class QNetwork(torch.nn.Module): def __init__(self, hp): super().__init__() self._hp = hp if self._hp.low_dim: self.linear1 = Linear(in_dim=4, out_dim=128, builder=self._hp.builder) else: self.encoder = ConvEncoder(self._hp) out_size = self.encoder.get_output_size() self.linear1 = Linear(in_dim=64 * 5 * 5, out_dim=128, builder=self._hp.builder) self.linear2 = Linear(in_dim=128 + self._hp.action_size, out_dim=1, builder=self._hp.builder) # self.linear3 = Linear(in_dim=128, out_dim=128, builder=self._hp.builder) # self.linear4 = Linear(in_dim=128, out_dim=128, builder=self._hp.builder) # self.linear5 = Linear(in_dim=128, out_dim=128, builder=self._hp.builder) # self.linear6 = Linear(in_dim=128, out_dim=1, builder=self._hp.builder) def forward(self, image_pairs, actions): if self._hp.low_dim: embeddings = image_pairs else: embeddings = self.encoder(image_pairs).view( image_pairs.size(0), -1) e = F.relu(self.linear1(embeddings)) e = torch.cat([e, actions], dim=1) # e = F.relu(self.linear2(e)) # e = F.relu(self.linear3(e)) # e = F.relu(self.linear4(e)) # e = F.relu(self.linear5(e)) qvalue = self.linear2(e) #self.linear6(e) return qvalue
class TempdistRegressor(BaseModel): def __init__(self, overrideparams, logger=None): super().__init__(logger) self._hp = self._default_hparams() self.overrideparams = overrideparams self.override_defaults( overrideparams) # override defaults with config file self.postprocess_params() assert self._hp.batch_size != -1 # make sure that batch size was overridden self.tdist_classifiers = [] self.build_network() def _default_hparams(self): default_dict = AttrDict({ 'tmax_label': 10, # the highest label for temporal distance, values are clamped after that 'use_skips': False, #todo try resnet architecture! 'ngf': 8, 'nz_enc': 64, 'classifier_restore_path': None # not really needed here. }) # add new params to parent params parent_params = super()._default_hparams() for k in default_dict.keys(): parent_params.add_hparam(k, default_dict[k]) return parent_params @property def singletempdistclassifier(self): return SingleTempDistClassifier def build_network(self, build_encoder=True): self.encoder = ConvEncoder(self._hp) out_size = self.encoder.get_output_size() self.spatial_softmax = SpatialSoftmax( out_size[1], out_size[2], out_size[0]) # height, width, channel self.linear = Linear(in_dim=out_size[0] * 2, out_dim=1, builder=self._hp.builder) def sample_image_pair(self, images): tlen = images.shape[1] # get positives: t0 = np.random.randint(0, tlen, self._hp.batch_size) t1 = np.array([ np.random.randint(t0[b], tlen, 1) for b in range(images.shape[0]) ]).squeeze() t0, t1 = torch.from_numpy(t0), torch.from_numpy(t1) im_t0 = select_indices(images, t0) im_t1 = select_indices(images, t1) self.labels = torch.clamp_max(t1 - t0, self._hp.tmax_label).type( torch.FloatTensor) img_pair_stack = torch.stack([im_t0, im_t1], dim=1) return img_pair_stack def forward(self, inputs): """ forward pass at training time :param images shape = batch x time x channel x height x width :return: model_output """ image_pairs = self.sample_image_pair(inputs.demo_seq_images) self.img_pair = image_pairs model_output = self.make_prediction(image_pairs) return model_output def make_prediction(self, image_pairs_stacked): im_t0, im_t1 = image_pairs_stacked[:, 0], image_pairs_stacked[:, 1] embeddings = self.encoder(torch.cat([im_t0, im_t1], dim=1)) embeddings = self.spatial_softmax(embeddings) self.tdist_estimates = self.linear(embeddings) model_output = AttrDict(tdist_estimates=self.tdist_estimates, img_pair=image_pairs_stacked) return model_output def _log_outputs(self, model_output, inputs, losses, step, log_images, phase): if log_images: self._logger.log_pair_predictions(self.img_pair, self.tdist_estimates, self.labels, 'tdist_regression', step, phase) def loss(self, model_output): losses = AttrDict() setattr( losses, 'mse', torch.nn.MSELoss()(model_output.tdist_estimates.squeeze(), self.labels.to(self._hp.device))) # compute total loss losses.total_loss = torch.stack(list(losses.values())).sum() return losses def get_device(self): return self._hp.device
class SingleTempDistClassifier(BaseModel): def __init__(self, hp, tdist, logger): super().__init__(logger) self._hp = hp self.tdist = tdist self.build_network() def build_network(self, build_encoder=True): self.encoder = ConvEncoder(self._hp) out_size = self.encoder.get_output_size() self.spatial_softmax = SpatialSoftmax( out_size[1], out_size[2], out_size[0]) # height, width, channel self.linear = Linear(in_dim=out_size[0] * 2, out_dim=1, builder=self._hp.builder) self.cross_ent_loss = nn.BCEWithLogitsLoss() def forward(self, inputs): """ forward pass at training time :param images shape = batch x time x channel x height x width :return: model_output """ import pdb pdb.set_trace() tlen = inputs.demo_seq_images.shape[1] pos_pairs, neg_pairs = self.sample_image_pair(inputs.demo_seq_images, tlen, self.tdist) image_pairs = torch.cat([pos_pairs, neg_pairs], dim=0) embeddings = self.encoder(image_pairs) embeddings = self.spatial_softmax(embeddings) logits = self.linear(embeddings) self.out_sigmoid = torch.sigmoid(logits) model_output = AttrDict(logits=logits, out_sigmoid=self.out_sigmoid, pos_pair=self.pos_pair, neg_pair=self.neg_pair) return model_output def sample_image_pair(self, images, tlen, tdist): # get positives: t0 = np.random.randint(0, tlen - tdist - 1, self._hp.batch_size) t1 = t0 + 1 + np.random.randint(0, tdist, self._hp.batch_size) t0, t1 = torch.from_numpy(t0), torch.from_numpy(t1) # print('t0', t0) # print('t1', t1) # print('t1 - t0', t1 - t0) im_t0 = select_indices(images, t0) im_t1 = select_indices(images, t1) self.pos_pair = torch.stack([im_t0, im_t1], dim=1) pos_pair_cat = torch.cat([im_t0, im_t1], dim=1) # get negatives: t0 = np.random.randint(0, tlen - tdist - 1, self._hp.batch_size) t1 = [ np.random.randint(t0[b] + tdist + 1, tlen, 1) for b in range(self._hp.batch_size) ] t1 = np.array(t1).squeeze() t0, t1 = torch.from_numpy(t0), torch.from_numpy(t1) # print('--------------') # print('t0', t0) # print('t1', t1) # print('t1 - t0', t1 - t0) im_t0 = select_indices(images, t0) im_t1 = select_indices(images, t1) self.neg_pair = torch.stack([im_t0, im_t1], dim=1) neg_pair_cat = torch.cat([im_t0, im_t1], dim=1) # one means within range of tdist range, zero means outside of tdist range self.labels = torch.cat([ torch.ones(self._hp.batch_size), torch.zeros(self._hp.batch_size) ]) return pos_pair_cat, neg_pair_cat def loss(self, model_output): logits_ = model_output.logits[:, 0] return self.cross_ent_loss(logits_, self.labels.to(self._hp.device)) def _log_outputs(self, model_output, inputs, losses, step, log_images, phase): out_sigmoid = self.out_sigmoid.data.cpu().numpy().squeeze() predictions = np.zeros(out_sigmoid.shape) predictions[np.where(out_sigmoid > 0.5)] = 1 labels = self.labels.data.cpu().numpy() num_neg = np.sum(labels == 0) false_positive_rate = np.sum( predictions[np.where(labels == 0)]) / float(num_neg) num_pos = np.sum(labels == 1) false_negative_rate = np.sum(1 - predictions[np.where( labels == 1)]) / float(num_pos) self._logger.log_scalar( false_positive_rate, 'tdist{}_false_postive_rate'.format(self.tdist), step, phase) self._logger.log_scalar( false_negative_rate, 'tdist{}_false_negative_rate'.format(self.tdist), step, phase) if log_images: self._logger.log_single_tdist_classifier_image( self.pos_pair, self.neg_pair, self.out_sigmoid, 'tdist{}'.format(self.tdist), step, phase)