def load_manifest(self, file_name): '''Read a keyed collection of path specifications. Parameters ---------- file_name : string path to the manifest file Returns ------- Manifest ''' if file_name != None: if not os.path.exists(file_name): # make the directory if it doesn't exist already dirname = os.path.dirname(file_name) Manifest.safe_mkdir(dirname) self.build_manifest(file_name) self.manifest = Manifest(ju.read(file_name)['manifest'], os.path.dirname(file_name)) else: self.manifest = None
def load_manifest(self, file_name): '''Read a keyed collection of path specifications. Parameters ---------- file_name : string path to the manifest file Returns ------- Manifest ''' if file_name is not None: if not os.path.exists(file_name): # make the directory if it doesn't exist already dirname = os.path.dirname(file_name) if dirname: Manifest.safe_mkdir(dirname) self.build_manifest(file_name) self.manifest = Manifest( ju.read(file_name)['manifest'], os.path.dirname(file_name)) else: self.manifest = None
def save_grand_averages(grand_up, grand_down, t, storage_directory): """Save capacitance check grand averages to local storage Need to save to separate files so that they can be loaded by NEURON fitting scripts Parameters ---------- grand_up, grand_down : array-like Series of voltages responses to positive (`grand_up`) and negative (`grand_down`) current pulses t : array-like Time values for `grand_up` and `grand_down` storage_directory : str Path to storage directory for files Returns ------- upfile, downfile : str Paths to the saved files """ Manifest.safe_mkdir(storage_directory) upfile = os.path.join(storage_directory, "upbase.dat") downfile = os.path.join(storage_directory, "downbase.dat") with open(upfile, 'w') as f: np.savetxt(f, np.column_stack((t, grand_up))) with open(downfile, 'w') as f: np.savetxt(f, np.column_stack((t, grand_down))) return upfile, downfile
def write_volume(volume, name, prefix=None, specify_resolution=None, extension='.nrrd', paths=None): if prefix is None: path = name else: path = os.path.join(prefix, name) if specify_resolution is not None: if isinstance(specify_resolution, (float, np.floating)) and \ specify_resolution % 1.0 == 0: specify_resolution = int(specify_resolution) path = path + '_{0}'.format(specify_resolution) path = path + extension logging.info('writing {0} volume to {1}'.format(name, path)) Manifest.safe_make_parent_dirs(path) volume.SetOrigin([0, 0, 0]) sitk.WriteImage(volume, str(path), True) if paths is not None: paths.append(path)
def get_structures(self, file_name=None): """ Read the list of adult mouse structures and return a Pandas DataFrame. Parameters ---------- file_name: string File name to save/read the structures table. If file_name is None, the file_name will be pulled out of the manifest. If caching is disabled, no file will be saved. Default is None. """ file_name = self.get_cache_path(file_name, self.STRUCTURES_KEY) if os.path.exists(file_name): structures = pd.DataFrame.from_csv(file_name) else: structures = OntologiesApi(base_uri=self.api.api_url).get_structures(1) structures = pd.DataFrame(structures) if self.cache: Manifest.safe_make_parent_dirs(file_name) structures.to_csv(file_name) structures.set_index(["id"], inplace=True, drop=False) return structures
def plot_negative_baselines(raw_traces, demix_traces, mask_array, roi_ids_mask, plot_dir, ext='png'): N, T = raw_traces.shape _, x, y = mask_array.shape logging.debug("finding negative baselines") neg_inds = find_negative_baselines(demix_traces)[0] overlap_inds = set() logging.debug("detected negative baselines: %s", str(neg_inds)) for roi_ind in neg_inds: Manifest.safe_mkdir(plot_dir) save_file = os.path.join(plot_dir, str(roi_ids_mask[roi_ind]) + '_negative.' + ext) plot_traces(raw_traces[roi_ind], demix_traces[roi_ind], roi_ids_mask[roi_ind], roi_ind, save_file) ''' plot overlapping masks ''' save_file = os.path.join(plot_dir, str(roi_ids_mask[roi_ind]) + '_negative_masks.' + ext) roi_overlap_inds = plot_overlap_masks_lengthOne(roi_ind, mask_array, save_file) overlap_inds.update(roi_overlap_inds) zero_inds = find_zero_baselines(demix_traces)[0] logging.debug("detected zero baselines: %s", str(zero_inds)) overlap_inds.update(zero_inds) return list(overlap_inds)
def load_manifest(self, file_name, version=None): '''Read a keyed collection of path specifications. Parameters ---------- file_name : string path to the manifest file Returns ------- Manifest ''' if file_name is not None: if not os.path.exists(file_name): # make the directory if it doesn't exist already dirname = os.path.dirname(file_name) if dirname: Manifest.safe_mkdir(dirname) self.build_manifest(file_name) try: self.manifest = Manifest( ju.read(file_name)['manifest'], os.path.dirname(file_name), version=version) except ManifestVersionError as e: raise ManifestVersionError(("Your manifest file (%s) is out of date" + " (version '%s' vs '%s'). Please remove this file" + " and it will be regenerated for you the next" " time you instantiate this class.") % (file_name, e.found_version, e.version), e.version, e.found_version) else: self.manifest = None
def save_reconstruction(self, specimen_id, file_name): """ Save the morphological reconstruction of a cell as an SWC file. Parameters ---------- specimen_id: int ID of the specimen, from the Specimens database model in the Allen Institute API. file_name: str Path to save the SWC file. """ Manifest.safe_make_parent_dirs(file_name) criteria = '[id$eq%d],neuron_reconstructions(well_known_files)' % specimen_id includes = 'neuron_reconstructions(well_known_files(well_known_file_type[name$eq\'%s\']))' % self.SWC_FILE_TYPE results = self.model_query('Specimen', criteria=criteria, include=includes, num_rows='all') try: file_url = results[0]['neuron_reconstructions'][ 0]['well_known_files'][0]['download_link'] except: raise Exception("Specimen %d has no reconstruction" % specimen_id) self.retrieve_file_over_http(self.api_url + file_url, file_name)
def display_features(qc_fig_dir, data_set, feature_data): """ Parameters ---------- qc_fig_dir: str directory name for storing html pages data_set: NWB data set feature_data: dict cell and sweep features Returns ------- """ if os.path.exists(qc_fig_dir): logging.warning("Removing existing qc figures directory: %s", qc_fig_dir) shutil.rmtree(qc_fig_dir) image_dir = os.path.join(qc_fig_dir, "img") Manifest.safe_mkdir(qc_fig_dir) Manifest.safe_mkdir(image_dir) logging.info("Saving figures") make_sweep_page(data_set, qc_fig_dir) make_cell_page(data_set, feature_data, qc_fig_dir)
def save_ephys_data(self, specimen_id, file_name): """ Save the electrophysology recordings for a cell as an NWB file. Parameters ---------- specimen_id: int ID of the specimen, from the Specimens database model in the Allen Institute API. file_name: str Path to save the NWB file. """ Manifest.safe_make_parent_dirs(file_name) criteria = '[id$eq%d],ephys_result(well_known_files(well_known_file_type[name$eq%s]))' % ( specimen_id, self.NWB_FILE_TYPE) includes = 'ephys_result(well_known_files(well_known_file_type))' results = self.model_query('Specimen', criteria=criteria, include=includes, num_rows='all') try: file_url = results[0]['ephys_result']['well_known_files'][0][ 'download_link'] except Exception as _: raise Exception("Specimen %d has no ephys data" % specimen_id) self.retrieve_file_over_http(self.api_url + file_url, file_name)
def save_reconstruction_markers(self, specimen_id, file_name): """ Save the marker file for the morphological reconstruction of a cell. These are comma-delimited files indicating points of interest in a reconstruction (truncation points, early tracing termination, etc). Parameters ---------- specimen_id: int ID of the specimen, from the Specimens database model in the Allen Institute API. file_name: str Path to save the marker file. """ Manifest.safe_make_parent_dirs(file_name) criteria = '[id$eq%d],neuron_reconstructions(well_known_files)' % specimen_id includes = 'neuron_reconstructions(well_known_files(well_known_file_type[name$eq\'%s\']))' % self.MARKER_FILE_TYPE results = self.model_query('Specimen', criteria=criteria, include=includes, num_rows='all') try: file_url = results[0]['neuron_reconstructions'][ 0]['well_known_files'][0]['download_link'] except: raise LookupError("Specimen %d has no marker file" % specimen_id) self.retrieve_file_over_http(self.api_url + file_url, file_name)
def debug(container_id, local=False, plots=None): SCRIPT = "/data/informatics/CAM/analysis/allensdk/allensdk/internal/pipeline_modules/run_observatory_container_thumbnails.py" SDK_PATH = "/data/informatics/CAM/analysis/allensdk/" OUTPUT_DIR = "/data/informatics/CAM/analysis/containers" container_dir = os.path.join(OUTPUT_DIR, str(container_id)) input_data = [] for exp in get_container_info(container_id): exp_data = robsth.get_input_data(exp['id']) exp_input_json = os.path.join(exp_data["output_directory"], "input.json") input_data.append( dict(input_json=exp_input_json, output_json=os.path.join(exp_data["output_directory"], "output.json"))) Manifest.safe_make_parent_dirs(exp_input_json) ju.write(exp_input_json, exp_data) run_module(SCRIPT, input_data, container_dir, sdk_path=SDK_PATH, pbs=dict(vmem=32, job_name="cthumbs_%d" % container_id, walltime="10:00:00"), local=local, optional_args=['--types=' + ','.join(plots)] if plots else None)
def get_template_volume(self, file_name=None): """ Read the template volume. Download it first if it doesn't exist. Parameters ---------- file_name: string File name to store the template volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.TEMPLATE_KEY, self.resolution) if file_name is None: raise Exception("No save file provided for annotation volume.") if os.path.exists(file_name): annotation, info = nrrd.read(file_name) else: Manifest.safe_make_parent_dirs(file_name) annotation, info = self.api.download_template_volume(self.resolution, file_name) return annotation, info
def get_injection_fraction(self, experiment_id, file_name=None): """ Read an injection fraction volume for a single experiment. Download it first if it doesn't exist. Injection fraction is the proportion of pixels in the injection site in a grid voxel in [0,1]. Parameters ---------- experiment_id: int ID of the experiment to download/read. This corresponds to section_data_set_id in the API. file_name: string File name to store the template volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.INJECTION_FRACTION_KEY, experiment_id, self.resolution) if file_name is None: raise Exception("No file name to save volume.") if not os.path.exists(file_name): Manifest.safe_make_parent_dirs(file_name) self.api.download_injection_fraction(file_name, experiment_id, self.resolution) return nrrd.read(file_name)
def get_data_mask(self, experiment_id, file_name=None): """ Read a data mask volume for a single experiment. Download it first if it doesn't exist. Data mask is a binary mask of voxels that have valid data. Only use valid data in analysis! Parameters ---------- experiment_id: int ID of the experiment to download/read. This corresponds to section_data_set_id in the API. file_name: string File name to store the template volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.DATA_MASK_KEY, experiment_id, self.resolution) if file_name is None: raise Exception("No file name to save volume.") if not os.path.exists(file_name): Manifest.safe_mkdir(os.path.dirname(file_name)) self.api.download_data_mask(file_name, experiment_id, self.resolution) return nrrd.read(file_name)
def get_annotation_volume(self, file_name=None): """ Read the annotation volume. Download it first if it doesn't exist. Parameters ---------- file_name: string File name to store the annotation volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.ANNOTATION_KEY, self.resolution) if file_name is None: raise Exception( "No save file name provided for annotation volume.") if os.path.exists(file_name): annotation, info = nrrd.read(file_name) else: Manifest.safe_mkdir(os.path.dirname(file_name)) annotation, info = self.api.download_annotation_volume( self.resolution, file_name) return annotation, info
def get_structures(self, file_name=None): """ Read the list of adult mouse structures and return a Pandas DataFrame. Parameters ---------- file_name: string File name to save/read the structures table. If file_name is None, the file_name will be pulled out of the manifest. If caching is disabled, no file will be saved. Default is None. """ file_name = self.get_cache_path(file_name, self.STRUCTURES_KEY) if os.path.exists(file_name): structures = pd.DataFrame.from_csv(file_name) else: structures = OntologiesApi().get_structures(1) structures = pd.DataFrame(structures) if self.cache: Manifest.safe_mkdir(os.path.dirname(file_name)) structures.to_csv(file_name) structures.set_index(['id'], inplace=True, drop=False) return structures
def get_experiment_structure_unionizes(self, experiment_id, file_name=None, is_injection=None, structure_ids=None, hemisphere_ids=None): """ Retrieve the structure unionize data for a specific experiment. Filter by structure, injection status, and hemisphere. Parameters ---------- experiment_id: int ID of the experiment of interest. Corresponds to section_data_set_id in the API. file_name: string File name to save/read the experiments list. If file_name is None, the file_name will be pulled out of the manifest. If caching is disabled, no file will be saved. Default is None. is_injection: boolean If True, only return unionize records that disregard non-injection pixels. If False, only return unionize records that disregard injection pixels. If None, return all records. Default None. structure_ids: list Only return unionize records that are inside a specific set of structures. If None, return all records. Default None. hemisphere_ids: list Only return unionize records that disregard pixels outside of a hemisphere. or set of hemispheres. Left = 1, Right = 2, Both = 3. If None, include all records [1, 2, 3]. Default None. """ file_name = self.get_cache_path(file_name, self.STRUCTURE_UNIONIZES_KEY, experiment_id) if os.path.exists(file_name): unionizes = pd.DataFrame.from_csv(file_name) else: unionizes = self.api.get_structure_unionizes([experiment_id]) unionizes = pd.DataFrame(unionizes) # rename section_data_set_id column to experiment_id unionizes.columns = [ 'experiment_id' if c == 'section_data_set_id' else c for c in unionizes.columns ] if self.cache: Manifest.safe_mkdir(os.path.dirname(file_name)) unionizes.to_csv(file_name) return self.filter_structure_unionizes(unionizes, is_injection, structure_ids, hemisphere_ids)
def save_ephys_data(self, specimen_id, file_name): """ Save the electrophysology recordings for a cell as an NWB file. Parameters ---------- specimen_id: int ID of the specimen, from the Specimens database model in the Allen Institute API. file_name: str Path to save the NWB file. """ Manifest.safe_make_parent_dirs(file_name) criteria = '[id$eq%d],ephys_result(well_known_files(well_known_file_type[name$eq%s]))' % ( specimen_id, self.NWB_FILE_TYPE) includes = 'ephys_result(well_known_files(well_known_file_type))' results = self.model_query('Specimen', criteria=criteria, include=includes, num_rows='all') try: file_url = results[0]['ephys_result'][ 'well_known_files'][0]['download_link'] except Exception as _: raise Exception("Specimen %d has no ephys data" % specimen_id) self.retrieve_file_over_http(self.api_url + file_url, file_name)
def get_injection_fraction(self, experiment_id, file_name=None): """ Read an injection fraction volume for a single experiment. Download it first if it doesn't exist. Injection fraction is the proportion of pixels in the injection site in a grid voxel in [0,1]. Parameters ---------- experiment_id: int ID of the experiment to download/read. This corresponds to section_data_set_id in the API. file_name: string File name to store the template volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.INJECTION_FRACTION_KEY, experiment_id, self.resolution) if file_name is None: raise Exception("No file name to save volume.") if not os.path.exists(file_name): Manifest.safe_mkdir(os.path.dirname(file_name)) self.api.download_injection_fraction(file_name, experiment_id, self.resolution) return nrrd.read(file_name)
def from_file_name(cls, file_name, cache=True, **kwargs): '''Alternative constructor using cache path file_name. Parameters ---------- file_name : string Path where storage_directories will be saved. **kwargs Keyword arguments to be supplied to __init__ Returns ------- cls : instance of GridDataApiPrerelease ''' if os.path.exists(file_name): storage_directories = json_utilities.read(file_name) else: storage_directories = _get_grid_storage_directories( cls.GRID_DATA_DIRECTORY) if cache: Manifest.safe_make_parent_dirs(file_name) json_utilities.write(file_name, storage_directories) return cls(storage_directories, **kwargs)
def get_data_mask(self, experiment_id, file_name=None): """ Read a data mask volume for a single experiment. Download it first if it doesn't exist. Data mask is a binary mask of voxels that have valid data. Only use valid data in analysis! Parameters ---------- experiment_id: int ID of the experiment to download/read. This corresponds to section_data_set_id in the API. file_name: string File name to store the template volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.DATA_MASK_KEY, experiment_id, self.resolution) if file_name is None: raise Exception("No file name to save volume.") if not os.path.exists(file_name): Manifest.safe_make_parent_dirs(file_name) self.api.download_data_mask(file_name, experiment_id, self.resolution) return nrrd.read(file_name)
def load_manifest(self, file_name, version=None): '''Read a keyed collection of path specifications. Parameters ---------- file_name : string path to the manifest file Returns ------- Manifest ''' if file_name is not None: if not os.path.exists(file_name): # make the directory if it doesn't exist already dirname = os.path.dirname(file_name) if dirname: Manifest.safe_mkdir(dirname) self.build_manifest(file_name) try: self.manifest = Manifest( ju.read(file_name)['manifest'], os.path.dirname(file_name), version=version) except ManifestVersionError as e: if e.outdated is True: intro = "is out of date" elif e.outdated is False: intro = "was made with a newer version of the AllenSDK" elif e.outdated is None: intro = "version did not match the expected version" ref_url = "https://github.com/alleninstitute/allensdk/wiki" raise ManifestVersionError(("Your manifest file (%s) %s" + " (its version is '%s', but" + " version '%s' is expected). " + " Please remove this file" + " and it will be regenerated for" + " you the next time you" + " instantiate this class." + " WARNING: There may be new data" + " files available that replace" + " the ones you already have" + " downloaded. Read the notes" + " for this release for more" + " details on what has changed" + " (%s).") % (file_name, intro, e.found_version, e.version, ref_url), e.version, e.found_version) self.manifest_path = file_name else: self.manifest = None
def get_experiments(self, dataframe=False, file_name=None, cre=None, injection_structure_ids=None): """ Read a list of experiments that match certain criteria. If caching is enabled, this will save the whole (unfiltered) list of experiments to a file. Parameters ---------- dataframe: boolean Return the list of experiments as a Pandas DataFrame. If False, return a list of dictionaries. Default False. file_name: string File name to save/read the structures table. If file_name is None, the file_name will be pulled out of the manifest. If caching is disabled, no file will be saved. Default is None. cre: boolean or list If True, return only cre-positive experiments. If False, return only cre-negative experiments. If None, return all experients. If list, return all experiments with cre line names in the supplied list. Default None. injection_structure_ids: list Only return experiments that were injected in the structures provided here. If None, return all experiments. Default None. """ file_name = self.get_cache_path(file_name, self.EXPERIMENTS_KEY) if os.path.exists(file_name): experiments = json_utilities.read(file_name) else: experiments = self.api.experiment_source_search(injection_structures="root") # removing these elements because they are specific to a particular # resolution for e in experiments: del e["num-voxels"] del e["injection-volume"] del e["sum"] del e["name"] if self.cache: Manifest.safe_make_parent_dirs(file_name) json_utilities.write(file_name, experiments) # filter the read/downloaded list of experiments experiments = self.filter_experiments(experiments, cre, injection_structure_ids) if dataframe: experiments = pd.DataFrame(experiments) experiments.set_index(["id"], inplace=True, drop=False) return experiments
def save_grand_averages(grand_up, grand_down, t, storage_directory): """Save to local storage to be loaded by NEURON fitting scripts""" Manifest.safe_mkdir(storage_directory) upfile = os.path.join(storage_directory, "upbase.dat") downfile = os.path.join(storage_directory, "downbase.dat") with open(upfile, 'w') as f: np.savetxt(f, np.column_stack((t, grand_up))) with open(downfile, 'w') as f: np.savetxt(f, np.column_stack((t, grand_down))) return upfile, downfile
def save_qc_figures(qc_fig_dir, nwb_file, output_data, plot_cell_figures): if os.path.exists(qc_fig_dir): logging.warning("removing existing qc figures directory: %s", qc_fig_dir) shutil.rmtree(qc_fig_dir) Manifest.safe_mkdir(qc_fig_dir) logging.debug("saving qc plot figures") plot_qc_figures.make_sweep_page(nwb_file, output_data, qc_fig_dir) plot_qc_figures.make_cell_page(nwb_file, output_data, qc_fig_dir, plot_cell_figures)
def build_plots(prefix, aspect, configs, output_dir, axes=None, transparent=False): Manifest.safe_mkdir(output_dir) for config in configs: h = config['height_px'] w = int(h * aspect) file_name = os.path.join(output_dir, config["pattern"] % prefix) logging.debug("file: %s", file_name) with oplots.figure_in_px(w, h, file_name, transparent=transparent) as fig: matplotlib.rcParams.update({'font.size': config['font_size']}) yield file_name
def get_experiments(self, dataframe=False, file_name=None, cre=None, injection_structure_ids=None, age=None, gender=None, workflow_state=None, workflows=None, project_code=None): """Read a list of experiments. If caching is enabled, this will save the whole (unfiltered) list of experiments to a file. Parameters ---------- dataframe: boolean Return the list of experiments as a Pandas DataFrame. If False, return a list of dictionaries. Default False. file_name: string File name to save/read the structures table. If file_name is None, the file_name will be pulled out of the manifest. If caching is disabled, no file will be saved. Default is None. """ file_name = self.get_cache_path(file_name, self.EXPERIMENTS_PRERELEASE_KEY) if os.path.exists(file_name): experiments = json_utilities.read(file_name) else: experiments = self.api.get_experiments() if self.cache: Manifest.safe_make_parent_dirs(file_name) json_utilities.write(file_name, experiments) # filter the read/downloaded list of experiments experiments = self.filter_experiments(experiments, cre, injection_structure_ids, age, gender, workflow_state, workflows, project_code) if dataframe: experiments = pd.DataFrame(experiments) experiments.set_index(['id'], inplace=True, drop=False) return experiments
def load_manifest(self, file_name, version=None): '''Read a keyed collection of path specifications. Parameters ---------- file_name : string path to the manifest file Returns ------- Manifest ''' if file_name is not None: if not os.path.exists(file_name): # make the directory if it doesn't exist already dirname = os.path.dirname(file_name) if dirname: Manifest.safe_mkdir(dirname) self.build_manifest(file_name) try: self.manifest = Manifest( ju.read(file_name)['manifest'], os.path.dirname(file_name), version=version) except ManifestVersionError as e: if e.outdated is True: intro = "is out of date" elif e.outdated is False: intro = "was made with a newer version of the AllenSDK" elif e.outdated is None: intro = "version did not match the expected version" raise ManifestVersionError(("Your manifest file (%s) %s" + " (its version is '%s', but version '%s' is expected). Please remove this file" + " and it will be regenerated for you the next" + " time you instantiate this class." + " WARNING: There may be new data files available that replace the ones you already have downloaded." + " Read the notes for this release for more details on what has changed" + " (https://github.com/alleninstitute/allensdk/wiki).") % (file_name, intro, e.found_version, e.version), e.version, e.found_version) self.manifest_path = file_name else: self.manifest = None
def main(): parser = argparse.ArgumentParser() parser.add_argument('cells_csv', help='CSV containing cell metadata') parser.add_argument('connections_h5', help='HDF5 file containing cell connectivity') parser.add_argument('network_vtk_file', help='.vtk output file') parser.add_argument('--manifest') parser.add_argument('--morphology_vtk_file') args = parser.parse_args() # read in the cell CSV with open(args.cells_csv, 'r') as f: r = csv.DictReader(f) cells = list(r) # read in the connections from the H5 file h5u = Hdf5Util() connections = h5u.read(args.connections_h5) # write out the results write_network_vtk(args.network_vtk_file, cells, connections) if args.manifest: config = ju.read(args.manifest) manifest = Manifest(config['manifest'], relative_base_dir=os.path.dirname(args.manifest)) write_morphology_vtk(args.morphology_vtk_file, cells, manifest)
def load_manifest(self, file_name, version=None): '''Read a keyed collection of path specifications. Parameters ---------- file_name : string path to the manifest file Returns ------- Manifest ''' if file_name is not None: if not os.path.exists(file_name): # make the directory if it doesn't exist already dirname = os.path.dirname(file_name) if dirname: Manifest.safe_mkdir(dirname) self.build_manifest(file_name) try: self.manifest = Manifest(ju.read(file_name)['manifest'], os.path.dirname(file_name), version=version) except ManifestVersionError as e: if e.outdated is True: intro = "is out of date" elif e.outdated is False: intro = "was made with a newer version of the AllenSDK" elif e.outdated is None: intro = "version did not match the expected version" raise ManifestVersionError(( "Your manifest file (%s) %s" + " (its version is '%s', but version '%s' is expected). Please remove this file" + " and it will be regenerated for you the next" " time you instantiate this class.") % (file_name, intro, e.found_version, e.version), e.version, e.found_version) self.manifest_path = file_name else: self.manifest = None
def save_ophys_experiment_data(self, ophys_experiment_id, file_name): Manifest.safe_make_parent_dirs(file_name) data = self.template_query('brain_observatory_queries', 'ophys_experiment_data', ophys_experiment_id=ophys_experiment_id) try: file_url = data[0]['download_link'] except Exception as _: raise Exception("ophys experiment %d has no data file" % ophys_experiment_id) self._log.warning( "Downloading ophys_experiment %d NWB. This can take some time." % ophys_experiment_id) self.retrieve_file_over_http(self.api_url + file_url, file_name)
def get_deformation_field(self, section_data_set_id, header_path=None, voxel_path=None): ''' Extract the local alignment parameters for this dataset. This a 3D vector image (3 components) describing a deformable local mapping from CCF voxels to this section data set's affine-aligned image stack. Parameters ---------- section_data_set_id : int Download the deformation field for this data set header_path : str, optional If supplied, the deformation field header will be downloaded to this path. voxel_path : str, optiona If supplied, the deformation field voxels will be downloaded to this path. Returns ------- numpy.ndarray : 3D X 3 component vector array (origin 0, 0, 0; 25-micron isometric resolution) defining a deformable transformation from CCF-space to affine-transformed image space. ''' if self.resolution not in self.DFMFLD_RESOLUTIONS: warnings.warn( 'deformation fields are only available at {} isometric resolutions, but this is a '\ '{}-micron cache'.format(self.DFMFLD_RESOLUTIONS, self.resolution) ) header_path = self.get_cache_path(header_path, self.DEFORMATION_FIELD_HEADER_KEY, section_data_set_id) voxel_path = self.get_cache_path(voxel_path, self.DEFORMATION_FIELD_VOXEL_KEY, section_data_set_id) if not (os.path.exists(header_path) and os.path.exists(voxel_path)): Manifest.safe_make_parent_dirs(header_path) Manifest.safe_make_parent_dirs(voxel_path) self.api.download_deformation_field(section_data_set_id, header_path=header_path, voxel_path=voxel_path) return sitk.GetArrayFromImage(sitk.ReadImage(str( header_path))) # TODO the str call here is only necessary in 2.7
def to_manifest(self, manifest_path=None): b = self.build_manifest(manifest_path) m = Manifest(config=b.path_info) if manifest_path != None: b.write_json_file(manifest_path, overwrite=True) return m
def get_structure_mask(self, structure_id, file_name=None, annotation_file_name=None): """ Read a 3D numpy array shaped like the annotation volume that has non-zero values where voxels belong to a particular structure. This will take care of identifying substructures. Parameters ---------- structure_id: int ID of a structure. file_name: string File name to store the structure mask. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. annotation_file_name: string File name to store the annotation volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.STRUCTURE_MASK_KEY, structure_id) if os.path.exists(file_name): return nrrd.read(file_name) else: ont = self.get_ontology() structure_ids = ont.get_descendant_ids([structure_id]) annotation, _ = self.get_annotation_volume(annotation_file_name) mask = self.make_structure_mask(structure_ids, annotation) if self.cache: Manifest.safe_mkdir(os.path.dirname(file_name)) nrrd.write(file_name, mask) return mask, None
def get_deformation_field(self, section_data_set_id, header_path=None, voxel_path=None): ''' Extract the local alignment parameters for this dataset. This a 3D vector image (3 components) describing a deformable local mapping from CCF voxels to this section data set's affine-aligned image stack. Parameters ---------- section_data_set_id : int Download the deformation field for this data set header_path : str, optional If supplied, the deformation field header will be downloaded to this path. voxel_path : str, optiona If supplied, the deformation field voxels will be downloaded to this path. Returns ------- numpy.ndarray : 3D X 3 component vector array (origin 0, 0, 0; 25-micron isometric resolution) defining a deformable transformation from CCF-space to affine-transformed image space. ''' if self.resolution not in self.DFMFLD_RESOLUTIONS: warnings.warn( 'deformation fields are only available at {} isometric resolutions, but this is a '\ '{}-micron cache'.format(self.DFMFLD_RESOLUTIONS, self.resolution) ) header_path = self.get_cache_path(header_path, self.DEFORMATION_FIELD_HEADER_KEY, section_data_set_id) voxel_path = self.get_cache_path(voxel_path, self.DEFORMATION_FIELD_VOXEL_KEY, section_data_set_id) if not (os.path.exists(header_path) and os.path.exists(voxel_path)): Manifest.safe_make_parent_dirs(header_path) Manifest.safe_make_parent_dirs(voxel_path) self.api.download_deformation_field( section_data_set_id, header_path=header_path, voxel_path=voxel_path ) return sitk.GetArrayFromImage(sitk.ReadImage(str(header_path))) # TODO the str call here is only necessary in 2.7
def cache_data(self, neuronal_model_id, working_directory=None): '''Take a an experiment id, query the Api RMA to get well-known-files download the files, and store them in the working directory. Parameters ---------- neuronal_model_id : int or string representation found in the neuronal_model table in the api working_directory : string Absolute path name where the downloaded well-known files will be stored. ''' if working_directory is None: working_directory = self.default_working_directory well_known_file_id_dict = self.get_well_known_file_ids( neuronal_model_id) if not well_known_file_id_dict or \ (not any(list(well_known_file_id_dict.values()))): raise(Exception("No data found for neuronal model id %d" % (neuronal_model_id))) Manifest.safe_mkdir(working_directory) work_dir = os.path.join(working_directory, 'work') Manifest.safe_mkdir(work_dir) modfile_dir = os.path.join(working_directory, 'modfiles') Manifest.safe_mkdir(modfile_dir) for key, id_dict in well_known_file_id_dict.items(): if (not self.cache_stimulus) and (key == 'stimulus'): continue for well_known_id, filename in id_dict.items(): well_known_file_url = self.construct_well_known_file_download_url( well_known_id) cached_file_path = os.path.join(working_directory, filename) self.retrieve_file_over_http( well_known_file_url, cached_file_path) fit_path = list(self.ids['fit'].values())[0] stimulus_filename = list(self.ids['stimulus'].values())[0] swc_morphology_path = list(self.ids['morphology'].values())[0] marker_path = \ list(self.ids['marker'].values())[0] if 'marker' in self.ids else '' sweeps = sorted(self.sweeps) self.create_manifest(fit_path, self.model_type, stimulus_filename, swc_morphology_path, marker_path, sweeps) manifest_path = os.path.join(working_directory, 'manifest.json') with open(manifest_path, 'w') as f: json.dump(self.manifest, f, indent=2)
def test_get_grid_storage_directories(storage_dirs, query_result, fn_temp_dir): # ------------------------------------------------------------------------ # test dirs only have grid/ subdirectory with mock.patch('allensdk.internal.core.lims_utilities.query', new=lambda a: query_result): obtained = _get_grid_storage_directories( GridDataApiPrerelease.GRID_DATA_DIRECTORY) assert not obtained # ------------------------------------------------------------------------ # test returns storage_dirs for path in storage_dirs.values(): Manifest.safe_make_parent_dirs(os.path.join(path, 'grid')) with mock.patch('allensdk.internal.core.lims_utilities.query', new=lambda a: query_result): obtained = _get_grid_storage_directories( GridDataApiPrerelease.GRID_DATA_DIRECTORY) for key, value in obtained: assert storage_dirs[key] == value
def get_session_data(self, session_id: int, filter_by_validity: bool = True, **unit_filter_kwargs): """ Obtain an EcephysSession object containing detailed data for a single session """ path = self.get_cache_path(None, self.SESSION_NWB_KEY, session_id, session_id) def read(_path): session_api = self._build_nwb_api_for_session( _path, session_id, filter_by_validity, **unit_filter_kwargs) return EcephysSession(api=session_api, test=True) Manifest.safe_make_parent_dirs(path) return one_file_call_caching(path, partial(self.s3fs.get, self._get_s3_path(path), path), lambda *a, **k: None, read, num_tries=self.fetch_tries)
def get_structure_mask(self, structure_id, file_name=None, annotation_file_name=None): """ Read a 3D numpy array shaped like the annotation volume that has non-zero values where voxels belong to a particular structure. This will take care of identifying substructures. Parameters ---------- structure_id: int ID of a structure. file_name: string File name to store the structure mask. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. annotation_file_name: string File name to store the annotation volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.STRUCTURE_MASK_KEY, structure_id) if os.path.exists(file_name): return nrrd.read(file_name) else: ont = self.get_ontology() structure_ids = ont.get_descendant_ids([structure_id]) annotation, _ = self.get_annotation_volume(annotation_file_name) mask = self.make_structure_mask(structure_ids, annotation) if self.cache: Manifest.safe_make_parent_dirs(file_name) nrrd.write(file_name, mask) return mask, None
def cache_data(self, neuronal_model_id, working_directory=None): '''Take a an experiment id, query the Api RMA to get well-known-files download the files, and store them in the working directory. Parameters ---------- neuronal_model_id : int or string representation found in the neuronal_model table in the api working_directory : string Absolute path name where the downloaded well-known files will be stored. ''' if working_directory is None: working_directory = self.default_working_directory well_known_file_id_dict = self.get_well_known_file_ids(neuronal_model_id) if not well_known_file_id_dict or \ (not any(well_known_file_id_dict.values())): raise(Exception("No data found for neuronal model id %d" % (neuronal_model_id))) Manifest.safe_mkdir(working_directory) work_dir = os.path.join(working_directory, 'work') Manifest.safe_mkdir(work_dir) modfile_dir = os.path.join(working_directory, 'modfiles') Manifest.safe_mkdir(modfile_dir) for key, id_dict in well_known_file_id_dict.items(): if (not self.cache_stimulus) and (key == 'stimulus'): continue for well_known_id, filename in id_dict.items(): well_known_file_url = self.construct_well_known_file_download_url(well_known_id) cached_file_path = os.path.join(working_directory, filename) self.retrieve_file_over_http(well_known_file_url, cached_file_path) fit_path = self.ids['fit'].values()[0] stimulus_filename = self.ids['stimulus'].values()[0] swc_morphology_path = self.ids['morphology'].values()[0] marker_path = self.ids['marker'].values()[0] if 'marker' in self.ids else '' sweeps = sorted(self.sweeps) self.create_manifest(fit_path, self.model_type, stimulus_filename, swc_morphology_path, marker_path, sweeps) manifest_path = os.path.join(working_directory, 'manifest.json') with open(manifest_path, 'wb') as f: f.write(json.dumps(self.manifest, indent=2))
def safe_mkdir_root_dir(): directory = os.path.abspath(os.sep) Manifest.safe_mkdir(directory) # should not error
def cacher(fn, *args, **kwargs): '''make an rma query, save it and return the dataframe. Parameters ---------- fn : function reference makes the actual query using kwargs. path : string where to save the data strategy : string or None, optional 'create' always generates the data, 'file' loads from disk, 'lazy' queries the server if no file exists, None generates the data and bypasses all caching behavior pre : function df|json->df|json, takes one data argument and returns filtered version, None for pass-through post : function df|json->?, takes one data argument and returns Object reader : function, optional path -> data, default NOP writer : function, optional path, data -> None, default NOP kwargs : objects passed through to the query function Returns ------- Object or None data type depends on fn, reader and/or post methods. ''' path = kwargs.pop('path', None) strategy = kwargs.pop('strategy', None) pre = kwargs.pop('pre', lambda d: d) post = kwargs.pop('post', None) reader = kwargs.pop('reader', None) writer = kwargs.pop('writer', None) if strategy is None: if writer or path: strategy = 'lazy' else: strategy = 'pass_through' if not strategy in ['lazy', 'pass_through', 'file', 'create']: raise ValueError("Unknown query strategy: {}.".format(strategy)) if 'lazy' == strategy: if os.path.exists(path): strategy = 'file' else: strategy = 'create' if strategy == 'pass_through': data = fn(*args, **kwargs) elif strategy in ['create']: Manifest.safe_make_parent_dirs(path) if writer: data = fn(*args, **kwargs) data = pre(data) writer(path, data) else: data = fn(*args, **kwargs) if reader: data = reader(path) # Note: don't provide post if fn or reader doesn't return data if post: data = post(data) return data try: data return data except: pass return
class Description(object): _log = logging.getLogger(__name__) def __init__(self): self.data = {} self.reserved_data = [] self.manifest = Manifest() def update_data(self, data, section=None): '''Merge configuration data possibly from multiple files. Parameters ---------- data : dict Configuration structure to add. section : string, optional What configuration section to read it into if the file does not specify. ''' if section is None: for (section, entries) in data.items(): if section not in self.data: self.data[section] = entries else: self.data[section].extend(entries) else: if section not in self.data: self.data[section] = [] self.data[section].append(data) def is_empty(self): '''Check if anything is in the object. Returns ------- boolean true if self.data is missing or empty ''' if self.data: return False return True def unpack(self, data, section=None): '''Read the manifest and other stand-alone configuration structure, or insert a configuration object into a section of an existing configuration. Parameters ---------- data : dict A configuration object including top level sections, or an configuration object to be placed within a section. section : string, optional. If this is present, place data within an existing section array. ''' if section is None: self.unpack_manifest(data) self.update_data(data) else: self.update_data(data, section) def unpack_manifest(self, data): '''Pull the manifest configuration section into a separate place. Parameters ---------- data : dict A configuration structure that still has a manifest section. ''' data_manifest = data.pop("manifest", {}) reserved_data = {"manifest": data_manifest} self.reserved_data.append(reserved_data) self.manifest.load_config(data_manifest) def fix_unary_sections(self, section_names=None): ''' Wrap section contents that don't have the proper array surrounding them in an array. Parameters ---------- section_names : list of strings, optional Keys of sections that might not be in array form. ''' if section_names is None: section_names = [] for section in section_names: if section in self.data: if type(self.data[section]) is dict: self.data[section] = [self.data[section]] Description._log.warn( "wrapped description section %s in an array." % (section))
def __init__(self): self.data = {} self.reserved_data = [] self.manifest = Manifest()
def get_experiment_structure_unionizes( self, experiment_id, file_name=None, is_injection=None, structure_ids=None, include_descendants=False, hemisphere_ids=None, ): """ Retrieve the structure unionize data for a specific experiment. Filter by structure, injection status, and hemisphere. Parameters ---------- experiment_id: int ID of the experiment of interest. Corresponds to section_data_set_id in the API. file_name: string File name to save/read the experiments list. If file_name is None, the file_name will be pulled out of the manifest. If caching is disabled, no file will be saved. Default is None. is_injection: boolean If True, only return unionize records that disregard non-injection pixels. If False, only return unionize records that disregard injection pixels. If None, return all records. Default None. structure_ids: list Only return unionize records for a specific set of structures. If None, return all records. Default None. include_descendants: boolean Include all descendant records for specified structures. Default False. hemisphere_ids: list Only return unionize records that disregard pixels outside of a hemisphere. or set of hemispheres. Left = 1, Right = 2, Both = 3. If None, include all records [1, 2, 3]. Default None. """ file_name = self.get_cache_path(file_name, self.STRUCTURE_UNIONIZES_KEY, experiment_id) if os.path.exists(file_name): unionizes = pd.DataFrame.from_csv(file_name) else: unionizes = self.api.get_structure_unionizes([experiment_id]) unionizes = pd.DataFrame(unionizes) # rename section_data_set_id column to experiment_id unionizes.columns = ["experiment_id" if c == "section_data_set_id" else c for c in unionizes.columns] if self.cache: Manifest.safe_make_parent_dirs(file_name) unionizes.to_csv(file_name) return self.filter_structure_unionizes( unionizes, is_injection, structure_ids, include_descendants, hemisphere_ids )