def forward_with_reality(self, intput, time_len): """encode z / u with ground truth I_{t+T} w/o resample""" vid_batch = vid_batch_to_cuda(intput) vid_batch = self._forward_image_encode(vid_batch) # 2. get u, kl_loss, using f1, f2 img_z, kl_loss, ori_z = self.encoder.no_sample(vid_batch) predictions = self._forward_with_z(vid_batch, img_z, time_len) return predictions
def forward_inception(self, intput, time_len, seed=None): """sample z / u from a distribution""" vid_batch = vid_batch_to_cuda(intput) vid_batch = self._forward_image_encode(vid_batch) # 2. get u, kl_loss, using f1, f2 V = vid_batch['bbox'].size(1) img_z = self.encoder.batch_sample(V, time_len, seed, vid_batch['image'][0]) predictions = self._forward_with_z(vid_batch, img_z, time_len) return predictions
def forward(self, in_vecs): vid_batch = vid_batch_to_cuda(in_vecs) # 1. Feature Extraction: 'feats' = 'appr' | 'bbox' vid_batch = self._forward_image_encode(vid_batch) # 2. get z, kl_loss, using only I_0, I_dt-1 img_z, kl_loss, long_u = self.encoder(vid_batch) preds = self._forward_with_z(vid_batch, img_z, self.dt) preds['kl_loss'] = kl_loss preds['orig_z'] = long_u return preds
def forward_lp(self, intput, time_len, sample): """todo: clean!""" vid_batch = vid_batch_to_cuda(intput) # 1. 'feats' = 'appr' | 'bbox', with image_tower AND factorize vid_batch = self._forward_image_encode(vid_batch) V = vid_batch['image'].size(1) bg_feat = self.get_bg_feat(vid_batch['bg_feat'], 0) predictions = [] cur_frame = self.filter_time_stamp(0, vid_batch) cur_frame['pred_recon'] = vid_batch['image'][1] # reality trip = self._next_trip(cur_frame['bbox'], cur_frame['trip']) self.encoder.init_hidden(vid_batch) for t in range(time_len): img_z, kl_loss, orig_z = self.encoder.one_sample( cur_frame['pred_recon'], sample) # img_z: dt=1, V, D -> (V, O, D) obj_z = img_z[0].unsqueeze(1) stop = True if (self.stop_grad and t == 0) else False feat_pred = self.graph_net(cur_frame['appr'], cur_frame['bbox'], obj_z, trip, stop) feat_pred['appr'] = self._apply_feat_constraint(feat_pred['appr']) out = self.decoder(self._unsqz(cur_frame['appr'], 0), self._unsqz(cur_frame['bbox'], 0), self.decoder_target_size, bg_feat) feat_pred['pred_recon'] = out['recon'].squeeze(0) cur_frame.update(feat_pred) predictions.append(feat_pred.copy()) predictions = collate_batch(predictions, time_len, V) src_recon = self.decoder(vid_batch['appr'], vid_batch['bbox'], self.decoder_target_size, bg_feat) for key in src_recon: new_key = 'real_' + key predictions[new_key] = src_recon[key] return predictions
def forward_with_reality(self, intput, time_len): """encode z / u with ground truth I_{t+T} w/o resample""" vid_batch = vid_batch_to_cuda(intput) vid_batch = self._forward_image_encode(vid_batch) # 2. get u, kl_loss, using f1, f2 img_z, kl_loss, ori_z, src_feats = self.encoder.no_sample(vid_batch) for k in vid_batch.keys(): if k == 'bg_feat': for i in range(len(vid_batch[k])): vid_batch[k][i] = vid_batch[k][i][3:] elif k == 'index': vid_batch[k] = vid_batch[k][3 * 3:] else: vid_batch[k] = vid_batch[k][3:] predictions = self._forward_with_z(vid_batch, img_z, time_len - self.show_length) assert type(predictions) == dict predictions['src_feats'] = src_feats return predictions
def forward(self, in_vecs): vid_batch = vid_batch_to_cuda(in_vecs) # 1. Feature Extraction: 'feats' = 'appr' | 'bbox' vid_batch = self._forward_image_encode(vid_batch) # 2. get z, kl_loss, using only I_0, I_dt-1 img_z, kl_loss, long_u, src_feats = self.encoder(vid_batch) for k in vid_batch.keys(): if k == 'bg_feat': for i in range(len(vid_batch[k])): vid_batch[k][i] = vid_batch[k][i][3:] elif k == 'index': vid_batch[k] = vid_batch[k][3 * 3:] else: vid_batch[k] = vid_batch[k][3:] # before graph neural network preds = self._forward_with_z(vid_batch, img_z, self.dt - self.show_length) preds['kl_loss'] = kl_loss preds['orig_z'] = long_u # preds['img_z'] = img_z return preds