class MapBatchFillMissing(MapTransformerBase): def __init__(self, source_map_size, world_in_map_size): super(MapBatchFillMissing, self).__init__(source_map_size, world_in_map_size) self.map_size = source_map_size self.world_size = world_in_map_size self.child_transformer = MapTransformerBase(source_map_size, world_in_map_size) self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE) self.map_memory = MapTransformerBase(source_map_size, world_in_map_size) self.last_observation = None self.dbg_t = None self.seq = 0 def init_weights(self): pass def reset(self): super(MapBatchFillMissing, self).reset() self.map_memory.reset() self.child_transformer.reset() self.seq = 0 self.last_observation = None def cuda(self, device=None): MapTransformerBase.cuda(self, device) self.child_transformer.cuda(device) self.map_memory.cuda(device) return self def dbg_write_extra(self, map, pose): if DebugWriter().should_write(): map = map[0:1, 0:3] self.seq += 1 # Initialize a transformer module if pose is not None: if self.dbg_t is None: self.dbg_t = MapTransformerBase(self.map_size, self.world_size) if self.is_cuda: self.dbg_t.cuda(self.cuda_device) # Transform the prediction to the global frame and write out to disk. self.dbg_t.set_map(map, pose) map_global, _ = self.dbg_t.get_map(None) else: map_global = map DebugWriter().write_img(map_global[0], "gif_overlaid", args={"world_size": self.world_size, "name": "identity_integrator"}) def forward(self, select_images, all_cam_poses, plan_mask=None, show=False): #show="li" self.prof.tick(".") # During rollout, plan_mask will alternate between [True] and [False] if plan_mask is None: all_images = select_images return all_images, all_cam_poses full_batch_size = len(all_cam_poses) all_maps_out_r = [] self.prof.tick("maps_to_global") # For each timestep, take the latest map that was available, transformed into this timestep # Do only a maximum of one transformation for any map to avoid cascading of errors! ptr = 0 for i in range(full_batch_size): this_pose = all_cam_poses[i:i+1] if plan_mask[i]: this_obs = (select_images[ptr:ptr+1], this_pose) ptr += 1 self.last_observation = this_obs else: assert self.last_observation is not None, "The first observation in a sequence needs to be used!" last_map, last_pose = self.last_observation # TODO: See if we can speed this up. Perhaps batch for all timesteps inbetween observations self.child_transformer.set_map(last_map, last_pose) this_obs = self.child_transformer.get_map(this_pose) all_maps_out_r.append(this_obs[0]) if show != "": Presenter().show_image(this_obs.data[0, 0:3], show, torch=True, scale=8, waitkey=50) self.prof.tick("integrate") # Step 3: Convert all maps to local frame all_maps_r = torch.cat(all_maps_out_r, dim=0) # Write gifs for debugging #self.dbg_write_extra(all_maps_r, None) self.set_maps(all_maps_r, all_cam_poses) self.prof.tick("maps_to_local") self.prof.loop() self.prof.print_stats(10) return all_maps_r, all_cam_poses
class PathPredictor(MapTransformerBase): # TODO: Standardize run_params def __init__(self, lingunet_params, source_map_size, world_size_px, world_size_m): super(PathPredictor, self).__init__(source_map_size, world_size_px, world_size_m) if lingunet_params.get("small_network"): self.unet = Lingunet5S(lingunet_params) else: self.unet = Lingunet5(lingunet_params) #self.map_filter = MapLangSemanticFilter(emb_size, feature_channels, 3) self.map_size_px = source_map_size self.world_size_px = world_size_px self.world_size_m = world_size_m self.dbg_t = None self.seq = 0 def init_weights(self): self.unet.init_weights() def reset(self): super(PathPredictor, self).reset() self.seq = 0 def cuda(self, device=None): MapTransformerBase.cuda(self, device) #self.map_filter.cuda(device) self.dbg_t = None return self def dbg_write_extra(self, map, pose): if DebugWriter().should_write(): self.seq += 1 # Initialize a transformer module if self.dbg_t is None: self.dbg_t = MapTransformerBase( self.map_size_px, self.world_size_px, self.world_size_m).to(map.device) # Transform the prediction to the global frame and write out to disk. self.dbg_t.set_map(map, pose) map_global, _ = self.dbg_t.get_map(None) DebugWriter().write_img(map_global[0], "gif_overlaid", args={ "world_size": self.world_size_px, "name": "pathpred" }) def forward(self, image, sentence_embeddimg, map_poses, proc_mask=None, show=""): # TODO: Move map perturb data augmentation in here. if image.size(1) > self.feature_channels: image = image[:, 0:self.feature_channels, :, :] pred_mask = self.unet(image, sentence_embeddimg) # Wtf is this: #self.map_filter.precompute_conv_weights(sentence_embeddimg) #features_filtered = self.map_filter(image) #out_maps = torch.cat([pred_mask, features_filtered], dim=1) """ if proc_mask is not None: bs = pred_mask.size(0) for i in range(bs): # If we are using this processed map, apply it if proc_mask[bs]: self.set_map(pred_mask[i:i+1], map_poses[i:i+1]) # Otherwise return the latest processed map, rotated in this frame of reference pred_mask[i] = self.get_map(map_poses[i:i+1]) if show != "": Presenter().show_image(pred_mask.data[0], show, torch=True, scale=8, waitkey=1) self.set_maps(pred_mask, map_poses) #self.dbg_write_extra(pred_mask, map_poses) """ return pred_mask, map_poses
class LeakyIntegratorMap(MapTransformerBase): def __init__(self, source_map_size, world_size_px, world_size_m, lamda=0.2): super(LeakyIntegratorMap, self).__init__(source_map_size, world_size_px, world_size_m) self.map_size = source_map_size self.world_size_px = world_size_px self.world_size_m = world_size_m self.child_transformer = MapTransformerBase(source_map_size, world_size_px, world_size_m) self.lamda = lamda self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE) self.map_memory = MapTransformerBase(source_map_size, world_size_px, world_size_m) self.dbg_t = None self.seq = 0 def init_weights(self): pass def reset(self): super(LeakyIntegratorMap, self).reset() self.map_memory.reset() self.child_transformer.reset() self.seq = 0 def cuda(self, device=None): MapTransformerBase.cuda(self, device) self.child_transformer.cuda(device) self.map_memory.cuda(device) return self def dbg_write_extra(self, map, pose): if DebugWriter().should_write(): map = map[0:1, 0:3] self.seq += 1 # Initialize a transformer module if pose is not None: if self.dbg_t is None: self.dbg_t = MapTransformerBase( self.map_size, self.world_size_px, self.world_size_m).to(map.device) # Transform the prediction to the global frame and write out to disk. self.dbg_t.set_map(map, pose) map_global, _ = self.dbg_t.get_map(None) else: map_global = map DebugWriter().write_img(map_global[0], "gif_overlaid", args={ "world_size": self.world_size_px, "name": "sm" }) def forward(self, images, coverages, cam_poses, add_mask=None, show=False): #show="li" self.prof.tick(".") batch_size = len(images) assert add_mask is None or add_mask[ 0] is not None, "The first observation in a sequence needs to be used!" # Step 1: All local maps to global: # TODO: Allow inputing global maps when new projector is ready self.child_transformer.set_maps(images, cam_poses) observations_g, _ = self.child_transformer.get_maps(None) self.child_transformer.set_maps(coverages, cam_poses) coverages_g, _ = self.child_transformer.get_maps(None) masked_observations_g_add = self.lamda * observations_g * coverages_g all_maps_out_g = [] self.prof.tick("maps_to_global") # TODO: Draw past trajectory on an extra channel of the semantic map # Step 2: Integrate serially in the global frame for i in range(batch_size): # If we don't have a map yet, initialize the map to this observation if self.map_memory.latest_maps is None: self.map_memory.set_map(observations_g[i:i + 1], None) #self.set_map(observations_g[i:i+1], None) # Allow masking of observations if add_mask is None or add_mask[i]: # Get the current global-frame map map_g, _ = self.map_memory.get_map(None) #obs_g = observations_g[i:i+1] cov_g = coverages_g[i:i + 1] obs_cov_g = masked_observations_g_add[i:i + 1] # Add the observation into the map using a leaky integrator rule (TODO: Output lamda from model) new_map_g = (1 - self.lamda ) * map_g + obs_cov_g + self.lamda * map_g * ( 1 - cov_g) # Remember this new map self.map_memory.set_map(new_map_g, None) #self.set_map(new_map_g, None) map_g, _ = self.map_memory.get_map(None) # Return this map in the camera frame of reference #map_r, _ = self.get_map(cam_poses[i:i+1]) if show != "": Presenter().show_image(map_g.data[0, 0:3], show, torch=True, scale=8, waitkey=50) all_maps_out_g.append(map_g) self.prof.tick("integrate") # Step 3: Convert all maps to local frame all_maps_g = torch.cat(all_maps_out_g, dim=0) # Write gifs for debugging self.dbg_write_extra(all_maps_g, None) self.child_transformer.set_maps(all_maps_g, None) maps_r, _ = self.child_transformer.get_maps(cam_poses) self.set_maps(maps_r, cam_poses) self.prof.tick("maps_to_local") self.prof.loop() self.prof.print_stats(10) return maps_r, cam_poses
class IdentityMapAccumulator(MapTransformerBase): """ This map accumulator rule simply keeps the latest observation and discards the rest """ def __init__(self, source_map_size, world_size_px, world_size_m): super(IdentityMapAccumulator, self).__init__(source_map_size, world_size_px, world_size_m) self.child_transformer = MapTransformerBase(source_map_size, world_size_px, world_size_m) def reset(self): super(IdentityMapAccumulator, self).reset() self.child_transformer.reset() def cuda(self, device=None): MapTransformerBase.cuda(self, device) self.child_transformer.cuda(device) return self def init_weights(self): pass def forward(self, current_maps, coverages, cam_poses, add_mask=None, show=""): batch_size = len(cam_poses) assert add_mask is None or add_mask[ 0] is not None, "The first observation in a sequence needs to be used!" # If we don't have masked observations, just return each timestep observations if add_mask is None: self.set_maps(current_maps, cam_poses) return current_maps, cam_poses maps_r = [] # If we have masked observations, then for timesteps where observation is masked (False), get the previous observation # rotated to the current frame for i in range(batch_size): # If we don't have a map yet, rotate this observation and initialize a map if self.latest_map is None: self.set_map(current_maps[i:i + 1], cam_poses[i:i + 1]) map_g, _ = self.get_map(None) self.set_map(map_g, None) # Allow masking of observations if add_mask is None or add_mask[i]: # Transform the observation into the global (map) frame self.child_transformer.set_map(current_maps[i:i + 1], cam_poses[i:i + 1]) obs_g, _ = self.child_transformer.get_map(None) # Remember this new map self.set_map(obs_g, None) # Return this map in the camera frame of reference map_r, _ = self.get_map(cam_poses[i:i + 1]) if show != "": Presenter().show_image(map_r.data[0, 0:3], show, torch=True, scale=8, waitkey=1) maps_r.append(map_r) maps_r = torch.cat(maps_r, dim=0) self.set_maps(maps_r, cam_poses) return maps_r, cam_poses
class IdentityIntegratorMap(MapTransformerBase): def __init__(self, source_map_size, world_size_px, world_size_m): super(IdentityIntegratorMap, self).__init__(source_map_size, world_size_px, world_size_m) self.map_size = source_map_size self.world_size = world_size_px self.world_size_m = world_size_m self.child_transformer = MapTransformerBase(source_map_size, world_size_px, world_size_m) self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE) self.map_memory = MapTransformerBase(source_map_size, world_size_px, world_size_m) self.last_observation = None self.dbg_t = None self.seq = 0 def init_weights(self): pass def reset(self): super(IdentityIntegratorMap, self).reset() self.map_memory.reset() self.child_transformer.reset() self.seq = 0 self.last_observation = None def cuda(self, device=None): MapTransformerBase.cuda(self, device) self.child_transformer.cuda(device) self.map_memory.cuda(device) return self def dbg_write_extra(self, map, pose): if DebugWriter().should_write(): map = map[0:1, 0:3] self.seq += 1 # Initialize a transformer module if pose is not None: if self.dbg_t is None: self.dbg_t = MapTransformerBase(self.map_size, self.world_size, self.world_size_m).to(map.device) # Transform the prediction to the global frame and write out to disk. self.dbg_t.set_map(map, pose) map_global, _ = self.dbg_t.get_map(None) else: map_global = map DebugWriter().write_img(map_global[0], "gif_overlaid", args={"world_size": self.world_size, "name": "identity_integrator"}) def forward(self, images, cam_poses, add_mask=None, show=False): #show="li" self.prof.tick(".") batch_size = len(cam_poses) assert add_mask is None or add_mask[0] is not None, "The first observation in a sequence needs to be used!" all_maps_out_r = [] self.prof.tick("maps_to_global") # For each timestep, take the latest map that was available, transformed into this timestep # Do only a maximum of one transformation for any map to avoid cascading of errors! for i in range(batch_size): if add_mask is None or add_mask[i]: this_obs = (images[i:i+1], cam_poses[i:i+1]) self.last_observation = this_obs else: last_obs = self.last_observation assert last_obs is not None, "The first observation in a sequence needs to be used!" self.child_transformer.set_map(last_obs[0], last_obs[1]) this_obs = self.child_transformer.get_map(cam_poses[i:i+1]) all_maps_out_r.append(this_obs[0]) if show != "": Presenter().show_image(this_obs.data[0, 0:3], show, torch=True, scale=8, waitkey=50) self.prof.tick("integrate") # Step 3: Convert all maps to local frame all_maps_r = torch.cat(all_maps_out_r, dim=0) # Write gifs for debugging self.dbg_write_extra(all_maps_r, None) self.set_maps(all_maps_r, cam_poses) self.prof.tick("maps_to_local") self.prof.loop() self.prof.print_stats(10) return all_maps_r, cam_poses def forward_deprecated(self, images, cam_poses, add_mask=None, show=False): #show="li" self.prof.tick(".") batch_size = len(cam_poses) assert add_mask is None or add_mask[0] is not None, "The first observation in a sequence needs to be used!" # Step 1: All local maps to global: # TODO: Allow inputing global maps when new projector is ready self.child_transformer.set_maps(images, cam_poses) observations_g, _ = self.child_transformer.get_maps(None) all_maps_out_g = [] self.prof.tick("maps_to_global") # TODO: Draw past trajectory on an extra channel of the semantic map # Step 2: Integrate serially in the global frame for i in range(batch_size): # If we don't have a map yet, initialize the map to this observation if self.map_memory.latest_maps is None: self.map_memory.set_map(observations_g[i:i+1], None) # Allow masking of observations if add_mask is None or add_mask[i]: # Use the map from this frame map_g = observations_g[i:i+1] self.map_memory.set_map(map_g, None) else: # Use the latest available map oriented in global frame map_g, _ = self.map_memory.get_map(None) if show != "": Presenter().show_image(map_g.data[0, 0:3], show, torch=True, scale=8, waitkey=50) all_maps_out_g.append(map_g) self.prof.tick("integrate") # Step 3: Convert all maps to local frame all_maps_g = torch.cat(all_maps_out_g, dim=0) # Write gifs for debugging self.dbg_write_extra(all_maps_g, None) self.child_transformer.set_maps(all_maps_g, None) maps_r, _ = self.child_transformer.get_maps(cam_poses) self.set_maps(maps_r, cam_poses) self.prof.tick("maps_to_local") self.prof.loop() self.prof.print_stats(10) return maps_r, cam_poses
class LeakyIntegratorGlobalMap(MapTransformerBase): def __init__(self, source_map_size, world_in_map_size, lamda=0.2): super(LeakyIntegratorGlobalMap, self).__init__(source_map_size, world_in_map_size) self.map_size = source_map_size self.world_size = world_in_map_size self.child_transformer = MapTransformerBase(source_map_size, world_in_map_size) self.lamda = lamda self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE) self.map_memory = [] self.dbg_t = None self.seq = 0 def init_weights(self): pass def reset(self): super(LeakyIntegratorGlobalMap, self).reset() self.map_memory = [] self.child_transformer.reset() self.seq = 0 def cuda(self, device=None): MapTransformerBase.cuda(self, device) self.child_transformer.cuda(device) return self def dbg_write_extra(self, map, pose): if DebugWriter().should_write(): map = map[0:1, 0:3] self.seq += 1 # Initialize a transformer module if pose is not None: if self.dbg_t is None: self.dbg_t = MapTransformerBase(self.map_size, self.world_size) if self.is_cuda: self.dbg_t.cuda(self.cuda_device) # Transform the prediction to the global frame and write out to disk. self.dbg_t.set_map(map, pose) map_global, _ = self.dbg_t.get_map(None) else: map_global = map DebugWriter().write_img(map_global[0], "gif_overlaid", args={ "world_size": self.world_size, "name": "sm" }) def forward(self, images_w, coverages_w, add_mask=None, reset_mask=None, show=False): #show="li" self.prof.tick(".") batch_size = len(images_w) assert add_mask is None or add_mask[ 0] is not None, "The first observation in a sequence needs to be used!" masked_observations_w_add = self.lamda * images_w * coverages_w all_maps_out_w = [] self.prof.tick("maps_to_global") # TODO: Draw past trajectory on an extra channel of the semantic map # Step 2: Integrate serially in the global frame for i in range(batch_size): if len(self.map_memory) == 0 or (reset_mask is not None and reset_mask[i]): new_map_w = images_w[i:i + 1] # Allow masking of observations elif add_mask is None or add_mask[i]: # Get the current global-frame map map_g = self.map_memory[-1] cov_w = coverages_w[i:i + 1] obs_cov_g = masked_observations_w_add[i:i + 1] # Add the observation into the map using a leaky integrator rule (TODO: Output lamda from model) new_map_w = (1 - self.lamda ) * map_g + obs_cov_g + self.lamda * map_g * ( 1 - cov_w) else: new_map_w = self.map_memory[-1] self.map_memory.append(new_map_w) all_maps_out_w.append(new_map_w) if show != "": Presenter().show_image(new_map_w.data[0, 0:3], show, torch=True, scale=8, waitkey=50) self.prof.tick("integrate") # Step 3: Convert all maps to local frame all_maps_w = torch.cat(all_maps_out_w, dim=0) # Write gifs for debugging #self.dbg_write_extra(all_maps_w, None) self.prof.tick("maps_to_local") self.prof.loop() self.prof.print_stats(10) return all_maps_w
class RatioPathPredictor(MapTransformerBase): # TODO: Standardize run_params def __init__(self, prior_channels=32, posterior_channels=32, pred_channels=2, emb_size=120, source_map_size=32, world_size=32, compute_prior=True, use_prior=False, l2=False): super(RatioPathPredictor, self).__init__(source_map_size, world_size) self.prior_img_channels = prior_channels self.posterior_img_channels = posterior_channels self.emb_size = emb_size self.l2 = l2 self.use_prior = use_prior if use_prior: assert compute_prior, "If we want to use the prior distribution, we should compute it, right?" self.unet_posterior = Unet5ContextualBneck( posterior_channels, pred_channels, emb_size, hc1=48, hb1=24, hc2=128) self.unet_prior = Unet5ContextualBneck( prior_channels, pred_channels, 1, hc1=48, hb1=24, hc2=128) self.softmax = SpatialSoftmax2d() self.norm = nn.InstanceNorm2d(2) self.compute_prior = compute_prior #self.map_filter = MapLangSemanticFilter(emb_size, feature_channels, 3) self.map_size = source_map_size self.world_size = world_size self.dbg_t = None self.seq = 0 def init_weights(self): self.unet_posterior.init_weights() self.unet_prior.init_weights() def reset(self): super(RatioPathPredictor, self).reset() self.seq = 0 def cuda(self, device=None): MapTransformerBase.cuda(self, device) #self.map_filter.cuda(device) self.softmax.cuda(device) self.dbg_t = None return self def dbg_write_extra(self, map, pose): if DebugWriter().should_write(): self.seq += 1 # Initialize a transformer module if self.dbg_t is None: self.dbg_t = MapTransformerBase(self.map_size, self.world_size) if self.is_cuda: self.dbg_t.cuda(self.cuda_device) # Transform the prediction to the global frame and write out to disk. self.dbg_t.set_map(map, pose) map_global, _ = self.dbg_t.get_map(None) DebugWriter().write_img(map_global[0], "gif_overlaid", args={"world_size": self.world_size, "name": "pathpred"}) def forward(self, image, sentence_embeddimg, map_poses, value_store=None, show=""): # TODO: Move map perturb data augmentation in here. if image.size(1) > self.posterior_img_channels: image = image[:, 0:self.posterior_img_channels, :, :] # channel 0 is start position # channels 1-3 are the grounded map # all other channels are the semantic map fake_embedding = Variable(empty_float_tensor([image.size(0), 1], self.is_cuda, self.cuda_device)) # The first N channels would've been computed by grounding map processor first. Remove them so that the # prior is clean from any language pred_mask_posterior = self.unet_posterior(image, sentence_embeddimg) if not self.l2: pred_mask_posterior_prob = self.softmax(pred_mask_posterior) else: pred_mask_posterior_prob = pred_mask_posterior if self.compute_prior: lang_conditioned_channels = self.posterior_img_channels - self.prior_img_channels prior_image = image[:, lang_conditioned_channels:] pred_mask_prior = self.unet_prior(prior_image, fake_embedding) if not self.l2: pred_mask_prior_prob = self.softmax(pred_mask_prior) else: pred_mask_prior_prob = pred_mask_prior ratio_mask = pred_mask_posterior_prob / (pred_mask_prior_prob + 1e-3) ratio_mask = self.softmax(ratio_mask) else: pred_mask_prior = pred_mask_posterior #if show != "": # Presenter().show_image(ratio_mask.data[i], show, torch=True, scale=8, waitkey=1) self.set_maps(pred_mask_posterior_prob, map_poses) ret = pred_mask_posterior_prob if self.use_prior: ret = pred_mask_prior_prob return ret, pred_mask_prior, pred_mask_posterior, map_poses