def uncompress_zar(fn_src, fn_dst): print(fn_src) print(fn_dst) print(zarr.storage.default_compressor) zarr.storage.default_compressor = None ds = ChunkedDataset(fn_src).open(cached=False) dst_dataset = ChunkedDataset(fn_dst) dst_dataset.initialize() # 'w', # # num_scenes=len(ds.scenes), # # num_frames=len(ds.frames), # # num_agents=len(ds.agents), # # num_tl_faces=len(ds.tl_faces) # ) with utils.timeit_context("copy scenes"): dst_dataset.scenes.append(ds.scenes[:]) with utils.timeit_context("copy frames"): dst_dataset.frames.append(ds.frames[:]) with utils.timeit_context("copy agents"): for i in tqdm(range(0, len(ds.agents), 1024 * 1024)): dst_dataset.agents.append(ds.agents[i:i + 1024 * 1024]) with utils.timeit_context("copy tl_faces"): dst_dataset.tl_faces.append(ds.tl_faces[:])
def zarr_scenes_chop_lite(input_zarr: str, output_zarr: str, num_frames_to_copy: int, history_num_frames: int) -> None: """ Copy (num_frames_to_copy - history_num_frames : num_frames_to_copy) from each scene in input_zarr and paste them into output_zarr Args: input_zarr (str): path to the input zarr output_zarr (str): path to the output zarr num_frames_to_copy (int): how many frames to copy from the start of each scene history_num_frames (int): how many frames to include as a history from the point `num_frames_to_copy` Returns: chopped_indices (list[int]) """ input_dataset = ChunkedDataset(input_zarr) input_dataset.open() # check we can actually copy the frames we want from each scene #assert np.all(np.diff(input_dataset.scenes["frame_index_interval"], 1) > num_frames_to_copy), "not enough frames" output_dataset = ChunkedDataset(output_zarr) output_dataset.initialize() # current indices where to copy in the output_dataset cur_scene_idx, cur_frame_idx, cur_agent_idx, cur_tl_face_idx = 0, 0, 0, 0 chopped_indices = [] for idx in tqdm(range(len(input_dataset.scenes)), desc="copying"): # get data and immediately chop frames, agents and traffic lights scene = input_dataset.scenes[idx] first_frame_idx = scene["frame_index_interval"][0] last_frame_idx = scene["frame_index_interval"][-1] if (last_frame_idx - first_frame_idx - num_frames_to_copy) >= 0 and num_frames_to_copy >= history_num_frames: frames = input_dataset.frames[first_frame_idx + num_frames_to_copy - history_num_frames: first_frame_idx + num_frames_to_copy] agents = input_dataset.agents[get_agents_slice_from_frames(*frames[[0, -1]])] tl_faces = input_dataset.tl_faces[get_tl_faces_slice_from_frames(*frames[[0, -1]])] # reset interval relative to our output (subtract current history and add output history) scene["frame_index_interval"][0] = cur_frame_idx scene["frame_index_interval"][1] = cur_frame_idx + len(frames) # address for less frames frames["agent_index_interval"] += cur_agent_idx - frames[0]["agent_index_interval"][0] frames["traffic_light_faces_index_interval"] += ( cur_tl_face_idx - frames[0]["traffic_light_faces_index_interval"][0] ) # write in dest using append (slow) output_dataset.scenes.append(scene[None, ...]) # need 2D array to concatenate output_dataset.frames.append(frames) output_dataset.agents.append(agents) output_dataset.tl_faces.append(tl_faces) # increase indices in output cur_scene_idx += len(scene) cur_frame_idx += len(frames) cur_agent_idx += len(agents) cur_tl_face_idx += len(tl_faces) # Add to chopped info chopped_indices.append(idx) else: print(' : '.join(('Excluded', str(idx), str(last_frame_idx - first_frame_idx)))) return chopped_indices