def __get_batches(self): self.cells_groups = group_cells_by_z(self.cells) # TODO: add optional shuffling of each group here self.batches = [] for centre_plane in self.cells_groups.keys(): cells_per_plane = self.cells_groups[centre_plane] for i in range(0, len(cells_per_plane), self.batch_size): self.batches.append(cells_per_plane[i:i + self.batch_size]) self.ordered_cells = [] for batch in self.batches: for cell in batch: self.ordered_cells.append(cell)
def main( cells, cubes_output_dir, planes_paths, cube_depth, cube_width, cube_height, voxel_sizes, network_voxel_sizes, max_ram, n_free_cpus=4, save_empty_cubes=False, ): start_time = datetime.now() if voxel_sizes[0] != network_voxel_sizes[0]: plane_scaling_factor = float(network_voxel_sizes[0]) / float( voxel_sizes[0] ) num_planes_needed_for_cube = round(cube_depth * plane_scaling_factor) else: num_planes_needed_for_cube = cube_depth if num_planes_needed_for_cube > len(planes_paths[0]): raise StackSizeError( "The number of planes provided is not sufficient " "for any cubes to be extracted. Please check the " "input data" ) first_plane = tifffile.imread(list(planes_paths.values())[0][0]) planes_shape = first_plane.shape brain_depth = len(list(planes_paths.values())[0]) # TODO: use to assert all centre planes processed center_planes = sorted(list(set([cell.z for cell in cells]))) # REFACTOR: rename (clashes with different meaning of planes_to_read below) planes_to_read = np.zeros(brain_depth, dtype=np.bool) if is_even(num_planes_needed_for_cube): half_nz = num_planes_needed_for_cube // 2 # WARNING: not centered because even for p in center_planes: planes_to_read[p - half_nz : p + half_nz] = 1 else: half_nz = num_planes_needed_for_cube // 2 # centered for p in center_planes: planes_to_read[p - half_nz : p + half_nz + 1] = 1 planes_to_read = np.where(planes_to_read)[0] if not planes_to_read.size: logging.error( f"No planes found, you need at the very least " f"{num_planes_needed_for_cube} " f"planes to proceed (i.e. cube z size)" f"Brain z dimension is {brain_depth}.", stack_info=True, ) raise ValueError( f"No planes found, you need at the very least " f"{num_planes_needed_for_cube} " f"planes to proceed (i.e. cube z size)" f"Brain z dimension is {brain_depth}." ) # TODO: check if needs to flip args.cube_width and args.cube_height cells_groups = group_cells_by_z(cells) # copies=2 is set because at all times there is a plane queue (deque) # and an array passed to `Cube` ram_per_process = get_ram_requirement_per_process( planes_paths[0][0], num_planes_needed_for_cube, copies=2, ) n_processes = get_num_processes( min_free_cpu_cores=n_free_cpus, ram_needed_per_process=ram_per_process, n_max_processes=len(planes_to_read), fraction_free_ram=0.2, max_ram_usage=system.memory_in_bytes(max_ram, "GB"), ) # TODO: don't need to extract cubes from all channels if # n_signal_channels>1 with ProcessPoolExecutor(max_workers=n_processes) as executor: n_planes_per_chunk = len(planes_to_read) // n_processes for i in range(n_processes): start_idx = i * n_planes_per_chunk end_idx = ( start_idx + n_planes_per_chunk + num_planes_needed_for_cube - 1 ) if end_idx > planes_to_read[-1]: end_idx = None sub_planes_to_read = planes_to_read[start_idx:end_idx] executor.submit( save_cubes, cells_groups, planes_paths, sub_planes_to_read, planes_shape, voxel_sizes, network_voxel_sizes, num_planes_for_cube=num_planes_needed_for_cube, cube_width=cube_width, cube_height=cube_height, cube_depth=cube_depth, thread_id=i, output_dir=cubes_output_dir, save_empty_cubes=save_empty_cubes, ) total_cubes = system.get_number_of_files_in_dir(cubes_output_dir) time_taken = datetime.now() - start_time logging.info( "All cubes ({}) extracted in: {}".format(total_cubes, time_taken) )
def test_group_cells_by_z(): z_planes_validate = [ 1272, 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1294, 1295, 1296, 1297, 1298, ] cell_numbers_in_groups_validate = [ 1, 3, 7, 8, 3, 1, 4, 3, 1, 2, 2, 1, 1, 2, 5, 2, 2, 2, 3, 1, 1, 6, 1, 1, 1, 1, ] cell_list = get_cells(xml_path) cells_groups = cells.group_cells_by_z(cell_list) z_planes_test = list(cells_groups.keys()) z_planes_test.sort() assert z_planes_validate == z_planes_test cell_numbers_in_groups_test = [ len(cells_groups[plane]) for plane in z_planes_test ] assert cell_numbers_in_groups_validate == cell_numbers_in_groups_test