def colm_pipeline(input_s3_path, output_s3_path, channel_of_interest, autofluorescence_channel, raw_data_path, stitched_data_path, log_s3_path=None): """ input_s3_path: S3 path to raw COLM data. Should be of the form s3://<bucket>/<experiment> output_s3_path: S3 path to store precomputed volume. Precomputed volumes for each channel will be stored under this path. Should be of the form s3://<bucket>/<path_to_precomputed> channel_of_interest: Channel number to operate on. Should be a single integer. autofluorescence_channel: Autofluorescence channel number. Should be a single integer. raw_data_path: Local path where corrected raw data will be stored. stitched_data_path: Local path where stitched slices will be stored. log_s3_path: S3 path at which pipeline intermediates can be stored including bias correction tile and xml files from Terastitcher. """ # get the metadata file paths specific for COLM input_s3_url = S3Url(input_s3_path.strip('/')) output_s3_url = S3Url(output_s3_path.strip('/')) # download raw data onto local SSD vw0_path = f'{input_s3_url.url}/VW0/' download_raw_data(vw0_path, channel_of_interest, raw_data_path, log_s3_path=log_s3_path) # compute stitching alignments # download stitching files if they exist at log path if not download_terastitcher_files(log_s3_path, raw_data_path): stitch_only = False if channel_of_interest == 0 else True if not stitch_only: run_terastitcher(raw_data_path, stitched_data_path, input_s3_path, log_s3_path=log_s3_path, compute_only=True) # bias correct all tiles # save bias correction tile to log_s3_path correct_raw_data(raw_data_path, channel_of_interest, log_s3_path=log_s3_path) # now stitch the data metadata = run_terastitcher(raw_data_path, stitched_data_path, input_s3_path, log_s3_path=log_s3_path, stitch_only=True) # downsample and upload stitched data to S3 create_precomputed_volume(stitched_data_path, np.array(metadata['voxel_size']), output_s3_path) # correct whole brain bias # in order to not replicate data (higher S3 cost) # overwrite original precomputed volume with corrected data correct_stitched_data(output_s3_path, output_s3_path)
def run_terastitcher( raw_data_path, stitched_data_path, input_s3_path, log_s3_path=None, stitch_only=False, compute_only=False, ): input_s3_url = S3Url(input_s3_path.strip("/")) # generate commands to stitch data using Terastitcher # if stitch_only and not log_s3_path: # raise("If using previous stitching results, must specify log_s3_path") # download terastitcher files if they arent already on local storage # download_terastitcher_files(log_s3_path, raw_data_path) if stitch_only: do_steps = STITCH_ONLY elif compute_only: do_steps = COMPUTE_ONLY else: do_steps = ALL_STEPS metadata, commands = generate_stitching_commands( stitched_data_path, raw_data_path, input_s3_url.bucket, input_s3_url.key, do_steps, ) # run the Terastitcher commands for i in commands: print(i) subprocess.run(shlex.split(i)) # # upload xml results to log_s3_path if not None # # and if not stitch_only if log_s3_path and not stitch_only: log_s3_url = S3Url(log_s3_path.strip("/")) files_to_save = glob(f"{raw_data_path}/*.xml") for i in tqdm(files_to_save, desc="saving xml files to S3"): out_path = i.split("/")[-1] upload_file_to_s3(i, log_s3_url.bucket, f"{log_s3_url.key}/{out_path}") return metadata
def get_layer_json(s3_layer_path, affine_matrix): """ affine_matrix has translations in microns """ vol = CloudVolume(s3_layer_path) s3_url = S3Url(s3_layer_path) # this is in units of m output_resolution = np.array([ minimum_ngl_json['dimensions']['x'][0], minimum_ngl_json['dimensions']['y'][0], minimum_ngl_json['dimensions']['z'][0] ]) if affine_matrix is None: affine_matrix = np.eye(4) else: # convert translations from microns to voxels and convert output resolution from m to um affine_matrix[:3, -1] /= output_resolution * 1e6 # # get image size in nm # image_size = np.array(vol.scales[0]['size']) * np.array(vol.scales[0]['resolution']) # # convert image size to voxels at output_resolution # image_size = image_size / (output_resolution * 1e9) # print(image_size) # # set origin to center of image for neuroglancer # origin = image_size / 2 # affine_matrix[:3,-1] -= origin if s3_url.bucket == 'colm-precomputed-volumes': url = f'precomputed://https://dlab-colm.neurodata.io/{s3_url.key}' else: url = f'precomputed://{s3_layer_path}' # layer_data['source']['transform']['matrix'] = affine[:3,:].tolist() layer_data = { 'type': vol.layer_type, 'source': { 'url': url, "transform": { # last column here is x, y, z translations respectively "matrix": affine_matrix[:3, :].tolist(), "outputDimensions": copy.deepcopy(minimum_ngl_json['dimensions']) }, }, 'tab': 'source', 'shader': '#uicontrol vec3 color color(default="white")\n#uicontrol float min slider(default=0, min=0, max=1, step=0.001)\n#uicontrol float max slider(default=1, min=0, max=1, step=0.001)\n#uicontrol float brightness slider(default=0, min=-1, max=1, step=0.1)\n#uicontrol float contrast slider(default=0, min=-3, max=3, step=0.1)\n\nfloat scale(float x) {\n return (x - min) / (max - min);\n}\n\nvoid main() {\n emitRGB(\n color * vec3(\n scale(\n toNormalized(getDataValue()))\n + brightness) * exp(contrast)\n );\n}', 'shaderControls': { 'max': 0.005 }, 'blend': 'default', 'name': s3_url.key.split('/')[-1] } return layer_data
def register(input_s3_path, output_s3_path, log_s3_path, orientation, fixed_scale, translation, rotation): # registration # get channel name print(input_s3_path) s3_url = S3Url(input_s3_path) channel = s3_url.key.split('/')[-1] exp = s3_url.key.split('/')[-2] # only after stitching autofluorescence channel base_path = os.path.expanduser('~/') registration_prefix = f'{base_path}/{exp}_{channel}_registration/' target_name = f'{base_path}/autofluorescence_data.tif' # download downsampled autofluorescence channel print("downloading data for registration...") voxel_size = download_data(input_s3_path, target_name) # if high res atlas labels file doesn't exist ara_annotation_10um = os.path.expanduser( '~/CloudReg/registration/atlases/ara_annotation_10um.tif') if not os.path.exists(ara_annotation_10um): # download it _ = download_data(ara_annotation_data_link(10), ara_annotation_10um, desired_resolution=10000) # initialize affine transformation for data atlas_res = 100 atlas_s3_path = ara_average_data_link(atlas_res) initial_affine = get_affine_matrix(translation, rotation, atlas_orientation, orientation, fixed_scale, atlas_s3_path) # run registration affine_string = [', '.join(map(str, i)) for i in initial_affine] affine_string = '; '.join(affine_string) matlab_registration_command = f''' matlab -nodisplay -nosplash -nodesktop -r \"base_path=\'{base_path}\';target_name=\'{target_name}\';registration_prefix=\'{registration_prefix}\';dxJ0={voxel_size};fixed_scale={fixed_scale};initial_affine=[{affine_string}];run(\'~/CloudReg/registration/registration_script_mouse_GN.m\')\" ''' print(matlab_registration_command) subprocess.run(shlex.split(matlab_registration_command)) # savse results to S3 if log_s3_path: # sync registration results to log_s3_path aws_cli(['s3', 'sync', registration_prefix, log_s3_path])
def get_layer_json(s3_layer_path, affine_matrix, output_resolution): """ affine_matrix has translations in microns output resolution in meters """ vol = CloudVolume(s3_layer_path) s3_url = S3Url(s3_layer_path) # this is in units of m # output_resolution = np.array([minimum_ngl_json['dimensions']['x'][0], minimum_ngl_json['dimensions']['y'][0], minimum_ngl_json['dimensions']['z'][0]]) if affine_matrix is None: affine_matrix = np.eye(4) else: # convert translations from microns to voxels and convert output resolution from m to um affine_matrix[:3, -1] /= output_resolution * 1e6 if s3_url.bucket == "colm-precomputed-volumes": url = f"precomputed://https://dlab-colm.neurodata.io/{s3_url.key}" else: url = f"precomputed://{s3_layer_path}" # layer_data['source']['transform']['matrix'] = affine[:3,:].tolist() layer_data = { "type": vol.layer_type, "source": { "url": url, "transform": { # last column here is x, y, z translations respectively "matrix": affine_matrix[:3, :].tolist(), "outputDimensions": get_output_dimensions_json(output_resolution), }, }, "tab": "source", "shader": '#uicontrol vec3 color color(default="white")\n#uicontrol float min slider(default=0, min=0, max=1, step=0.001)\n#uicontrol float max slider(default=1, min=0, max=1, step=0.001)\n#uicontrol float brightness slider(default=0, min=-1, max=1, step=0.1)\n#uicontrol float contrast slider(default=0, min=-3, max=3, step=0.1)\n\nfloat scale(float x) {\n return (x - min) / (max - min);\n}\n\nvoid main() {\n emitRGB(\n color * vec3(\n scale(\n toNormalized(getDataValue()))\n + brightness) * exp(contrast)\n );\n}', "shaderControls": { "max": 0.005 }, "blend": "default", "name": s3_url.key.split("/")[-1], } return layer_data
def download_raw_data(in_bucket_path, channel, outdir, log_s3_path=None): input_s3_url = S3Url(in_bucket_path.strip("/")) in_bucket_name = input_s3_url.bucket in_path = input_s3_url.key total_n_jobs = cpu_count() # get list of all tiles to correct for given channel all_files = get_list_of_files_to_process(in_bucket_name, in_path, channel) total_files = len(all_files) # download all the files as tiff files_per_proc = math.ceil(total_files / total_n_jobs) + 1 work = chunks(all_files, files_per_proc) with tqdm_joblib(tqdm(desc="Downloading tiles", total=total_n_jobs)) as progress_bar: Parallel(n_jobs=total_n_jobs, verbose=10)( delayed(download_tiles)(files, in_bucket_name, outdir) for files in work)
def register( input_s3_path, output_s3_path, log_s3_path, orientation, fixed_scale, translation, rotation, missing_data_correction, grid_correction, bias_correction, regularization, num_iterations, ): # registration # get channel name print(input_s3_path) s3_url = S3Url(input_s3_path) channel = s3_url.key.split("/")[-1] exp = s3_url.key.split("/")[-2] # only after stitching autofluorescence channel base_path = os.path.expanduser("~/") registration_prefix = f"{base_path}/{exp}_{channel}_registration/" target_name = f"{base_path}/autofluorescence_data.tif" # download downsampled autofluorescence channel print("downloading data for registration...") voxel_size = download_data(input_s3_path, target_name) # if high res atlas labels file doesn't exist ara_annotation_10um = os.path.expanduser( "~/CloudReg/registration/atlases/ara_annotation_10um.tif") if not os.path.exists(ara_annotation_10um): # download it _ = download_data(ara_annotation_data_link(10), ara_annotation_10um, desired_resolution=10000) # initialize affine transformation for data atlas_res = 100 atlas_s3_path = ara_average_data_link(atlas_res) initial_affine = get_affine_matrix( translation, rotation, atlas_orientation, orientation, fixed_scale, atlas_s3_path, ) # run registration affine_string = [", ".join(map(str, i)) for i in initial_affine] affine_string = "; ".join(affine_string) matlab_registration_command = f""" matlab -nodisplay -nosplash -nodesktop -r \"niter={num_iterations};sigmaR={regularization};missing_data_correction={int(missing_data_correction)};grid_correction={int(grid_correction)};bias_correction={int(bias_correction)};base_path=\'{base_path}\';target_name=\'{target_name}\';registration_prefix=\'{registration_prefix}\';dxJ0={voxel_size};fixed_scale={fixed_scale};initial_affine=[{affine_string}];run(\'~/CloudReg/registration/registration_script_mouse_GN.m\')\" """ print(matlab_registration_command) subprocess.run(shlex.split(matlab_registration_command)) # save results to S3 if log_s3_path: # sync registration results to log_s3_path aws_cli(["s3", "sync", registration_prefix, log_s3_path]) # upload high res deformed atlas and deformed target to S3 ingest_image_stack( output_s3_path, voxel_size, f"{registration_prefix}/downloop_2_labels_to_target_highres.img", "img", "uint64", ) # print out viz link for visualization # visualize results at 5 microns viz_link = create_viz_link([input_s3_path, output_s3_path], output_resolution=np.array([5] * 3) / 1e6) print("###################") print(f"VIZ LINK: {viz_link}") print("###################")
def colm_pipeline( input_s3_path, output_s3_path, channel_of_interest, autofluorescence_channel, raw_data_path, stitched_data_path, log_s3_path=None, ): """ input_s3_path: S3 path to raw COLM data. Should be of the form s3://<bucket>/<experiment> output_s3_path: S3 path to store precomputed volume. Precomputed volumes for each channel will be stored under this path. Should be of the form s3://<bucket>/<path_to_precomputed> channel_of_interest: Channel number to operate on. Should be a single integer. autofluorescence_channel: Autofluorescence channel number. Should be a single integer. raw_data_path: Local path where corrected raw data will be stored. stitched_data_path: Local path where stitched slices will be stored. log_s3_path: S3 path at which pipeline intermediates can be stored including bias correction tile and xml files from Terastitcher. """ # get the metadata file paths specific for COLM input_s3_url = S3Url(input_s3_path.strip("/")) output_s3_url = S3Url(output_s3_path.strip("/")) # download raw data onto local SSD vw0_path = f"{input_s3_url.url}/VW0/" download_raw_data( vw0_path, channel_of_interest, raw_data_path, log_s3_path=log_s3_path ) # compute stitching alignments first if you need to # download stitching files if they exist at log path if ( not download_terastitcher_files(log_s3_path, raw_data_path) and channel_of_interest == 0 ): metadata = run_terastitcher( raw_data_path, stitched_data_path, input_s3_path, log_s3_path=log_s3_path, compute_only=True, ) # bias correct all tiles # save bias correction tile to log_s3_path correct_raw_data(raw_data_path, channel_of_interest, log_s3_path=log_s3_path) # now stitch the data with alignments we computed metadata = run_terastitcher( raw_data_path, stitched_data_path, input_s3_path, log_s3_path=log_s3_path, stitch_only=True, ) # downsample and upload stitched data to S3 stitched_path = glob(f"{stitched_data_path}/RES*")[0] create_precomputed_volume( stitched_path, np.array(metadata["voxel_size"]), output_s3_path ) # correct whole brain bias # in order to not replicate data (higher S3 cost) # overwrite original precomputed volume with corrected data correct_stitched_data(output_s3_path, output_s3_path) # print viz link to console # visualize data at 5 microns viz_link = create_viz_link( [output_s3_path], output_resolution=np.array([5] * 3) / 1e6 ) print("###################") print(f"VIZ LINK: {viz_link}") print("###################")
def correct_raw_data(raw_data_path, channel, subsample_factor=2, log_s3_path=None, background_correction=True): total_n_jobs = cpu_count() # overwrite existing raw data with corrected data outdir = raw_data_path # get list of all tiles to correct for given channel all_files = np.sort(glob.glob(f'{raw_data_path}/*/*.tiff')) if background_correction: background_val = get_background_value(raw_data_path) total_files = len(all_files) bias_path = f'{outdir}/CHN0{channel}_bias.tiff' if os.path.exists(bias_path): bias = tf.imread(bias_path) else: # subsample tiles files_cb = all_files[::subsample_factor] num_files = len(files_cb) # compute running sums in parallel sums = Parallel(total_n_jobs, verbose=10)( delayed(sum_tiles)(f) for f in chunks(files_cb, math.ceil(num_files // (total_n_jobs)) + 1)) sums = [i[:, :, None] for i in sums] mean_tile = np.squeeze(np.sum(np.concatenate(sums, axis=2), axis=2)) / num_files if background_correction: # subtract background out from bias correction mean_tile -= background_val mean_tile = sitk.GetImageFromArray(mean_tile) # get the bias correction tile using N4ITK bias = sitk.GetArrayFromImage(get_bias_field(mean_tile, scale=1.0)) # save bias tile to local directory tf.imsave(bias_path, bias.astype('float32')) # save bias tile to S3 if log_s3_path: s3 = boto3.resource('s3') img = Image.fromarray(bias) fp = BytesIO() img.save(fp, format='TIFF') # reset pointer to beginning of file fp.seek(0) log_s3_url = S3Url(log_s3_path.strip('/')) bias_path = f'{log_s3_url.key}/CHN0{channel}_bias.tiff' s3.Object(log_s3_url.bucket, bias_path).upload_fileobj(fp) # correct all the files and save them files_per_proc = math.ceil(total_files / total_n_jobs) + 1 work = chunks(all_files, files_per_proc) with tqdm_joblib(tqdm(desc="Correcting tiles", total=total_n_jobs)) as progress_bar: Parallel(n_jobs=total_n_jobs, verbose=10)( delayed(correct_tiles)(files, outdir, bias, background_val) for files in work)