def ndarray(argument): """ This dtype may be used to indicate numpy ndarrays as well as h5py arrays or omsi_dependencies :param argument: The argument to be parsed to ndarray :return: The converted ndarray """ from omsi.dataformat.omsi_file.analysis import omsi_file_analysis from omsi.dataformat.omsi_file.msidata import omsi_file_msidata from omsi.dataformat.omsi_file.common import omsi_file_common if isinstance(argument, basestring): try: return np.asarray(ast.literal_eval(argument)) except (ValueError, SyntaxError): omsi_out_object = omsi_file_common.get_omsi_object(h5py_object=argument) if omsi_out_object is not None: return omsi_out_object else: raise ValueError('String could not be converted to valid ndarray. This may be ' + 'due to, e.g., a syntax error or the file may not exists') elif isinstance(argument, dependency_dict) or \ isinstance(argument, h5py.Dataset) or isinstance(argument, h5py.Group) or \ isinstance(argument, omsi_file_analysis) or \ isinstance(argument, omsi_file_msidata): return argument elif argument is None: return None return np.asarray(argument)
def get_method_info(self, check_parent=True): """ Get the omsi_file_methods object with the method information. :param check_parent: If no method group is available for this dataset should we check whether the parent object (i.e., the experiment group containing the dataset) has information about the method. (default=True) :returns: omsi_file_methods object for the requested method info. The function returns None in case no method information was found for the experiment """ if self.method_parent is not None: try: return omsi_file_methods(self.method_parent[unicode( omsi_format_methods.methods_groupname)]) except KeyError: try: return omsi_file_methods(self.method_parent[unicode( omsi_format_methods.methods_old_groupname)]) except KeyError: if check_parent: return omsi_file_common.get_omsi_object( self.method_parent.parent).get_method_info() except: pass return None
def ndarray(argument): """ This dtype may be used to indicate numpy ndarrays as well as h5py arrays or omsi_dependencies :param argument: The argument to be parsed to ndarray :return: The converted ndarray """ from omsi.dataformat.omsi_file.analysis import omsi_file_analysis from omsi.dataformat.omsi_file.msidata import omsi_file_msidata from omsi.dataformat.omsi_file.common import omsi_file_common if isinstance(argument, basestring): try: return np.asarray(ast.literal_eval(argument)) except (ValueError, SyntaxError): omsi_out_object = omsi_file_common.get_omsi_object( h5py_object=argument) if omsi_out_object is not None: return omsi_out_object else: raise ValueError( 'String could not be converted to valid ndarray. This may be ' + 'due to, e.g., a syntax error or the file may not exists' ) elif isinstance(argument, dependency_dict) or \ isinstance(argument, h5py.Dataset) or isinstance(argument, h5py.Group) or \ isinstance(argument, omsi_file_analysis) or \ isinstance(argument, omsi_file_msidata): return argument elif argument is None: return None return np.asarray(argument)
def get_metadata_collections(self, omsi_object=None, name=None): """ Get all metadata_collections defined for given OpenMSI file API object or h5py.Group. :param omsi_object: The omsi file API object or h5py.Group object that we should check. If set to None (default) then the self.metadata_parent will be use :param name: If name is specified, then only retrieve collections with the given name :returns: List of omsi_file_metadata_collection objects for the requested group. The function returns None in case that the h5py.Group for the omsi_object could not be determined. """ metadata_collections = [] use_omsi_object = self.metadata_parent if omsi_object is None else omsi_object h5py_group = None if isinstance(use_omsi_object, omsi_file_common): h5py_group = use_omsi_object.managed_group elif isinstance(use_omsi_object, h5py.Group): h5py_group = use_omsi_object elif isinstance(use_omsi_object, h5py.Dataset): h5py_group = use_omsi_object.parent elif isinstance(use_omsi_object, h5py.File): h5py_group = use_omsi_object['/'] if h5py_group is not None: for h5py_name, h5py_value in h5py_group.iteritems(): curr_omsi_object = omsi_file_common.get_omsi_object(h5py_value) if isinstance(curr_omsi_object, omsi_file_metadata_collection): if name is None or h5py_name == name: metadata_collections.append(curr_omsi_object) else: return None return metadata_collections
def parse_cl_arguments(self): """ The function assumes that the command line parser has been setup using the initialize_argument_parser(..) This function parses all arguments that are specific to the command-line parser itself. Analysis arguments are added and parsed later by the add_and_parse_analysis_arguments(...) function. The reason for this is two-fold: i) to separate the parsing of analysis arguments and arguments of the command-line driver and ii) if the same HDF5 file is used as input and output target, then we need to open it first here in append mode before it gets opened in read mode later by the arguments. *Side effects:* The function sets ``self.output_target`` and ``self.profile_analysis`` """ # Parse the arguments and convert them to a dict using vars parsed_arguments = vars(self.parser.parse_known_args()[0]) # Clean up the arguments to remove default arguments of the driver class # before we hand the arguments to the analysis class if self.analysis_class_arg_name in parsed_arguments: parsed_arguments.pop(self.analysis_class_arg_name) # Process the --save argument to determine where we should save the output if self.output_save_arg_name in parsed_arguments and mpi_helper.get_rank() == self.mpi_root: # Determine the filename and experiment group from the path self.output_target = parsed_arguments.pop(self.output_save_arg_name) if self.output_target is not None: output_filename, output_object_path = omsi_file_common.parse_path_string(self.output_target) # Create the output file if output_filename is None: raise ValueError("ERROR: Invalid save parameter specification " + self.output_target) elif os.path.exists(output_filename) and not os.path.isfile(output_filename): raise ValueError("ERROR: Save parameter not specify a file.") if not os.path.exists(output_filename): out_file = omsi_file(output_filename, mode='a') self.output_target = out_file.create_experiment() self. __output_target_self = output_filename else: out_file = omsi_file(output_filename, mode='r+') if output_object_path is not None: self.output_target = omsi_file_common.get_omsi_object(out_file[output_object_path]) else: if out_file.get_num_experiments() > 0: self.output_target = out_file.get_experiment(0) else: self.output_target = out_file.create_experiment() else: self.output_target = parsed_arguments.pop(self.output_save_arg_name) # The --loglovel argument if self.log_level_arg_name in parsed_arguments: user_log_level = parsed_arguments.pop(self.log_level_arg_name) if user_log_level in log_helper.log_levels.keys(): log_helper.set_log_level(level=log_helper.log_levels[user_log_level]) else: log_helper.error(module_name=__name__, message="Invalid log level specified")
def get_instrument_info(self, check_parent=True): """ Get the HDF5 group opbject with the instrument information. :param check_parent: If no method group is available for this dataset should we check whether the parent object (i.e., the experiment group containing the dataset) has information about the method. (default=True) :returns: omsi_file_instrument object for the requested instrument info. The function returns \ None in case no instrument information was found for the experiment """ try: return omsi_file_instrument(self.instrument_parent[unicode(omsi_format_instrument.instrument_groupname)]) except KeyError: # Check whether the parent group has information about the instrument if check_parent: return omsi_file_common.get_omsi_object(self.instrument_parent.parent).get_instrument_info() except: pass return None
def get_method_info(self, check_parent=True): """ Get the omsi_file_methods object with the method information. :param check_parent: If no method group is available for this dataset should we check whether the parent object (i.e., the experiment group containing the dataset) has information about the method. (default=True) :returns: omsi_file_methods object for the requested method info. The function returns None in case no method information was found for the experiment """ if self.method_parent is not None: try: return omsi_file_methods(self.method_parent[unicode(omsi_format_methods.methods_groupname)]) except KeyError: try: return omsi_file_methods(self.method_parent[unicode(omsi_format_methods.methods_old_groupname)]) except KeyError: if check_parent: return omsi_file_common.get_omsi_object(self.method_parent.parent).get_method_info() except: pass return None
def __write_omsi_analysis_data__(cls, data_group, ana_data): """ Private helper function used to write the data defined by a analysis_data object to HDF5. :param data_group: The h5py data group to which the data should be written to. :param ana_data: The analysis_data object with the description of the data to be written. :type ana_data: omsi.analysis.analysis_data """ from omsi.datastructures.analysis_data import analysis_data, data_dtypes curr_dtype = ana_data['dtype'] try: if curr_dtype == data_dtypes.get_dtypes()['ndarray']: curr_dtype = ana_data['data'].dtype except TypeError: pass try: if curr_dtype == data_dtypes.get_dtypes()['bool']: curr_dtype = bool except TypeError: pass try: if curr_dtype == data_dtypes.get_dtypes()['str']: curr_dtype = omsi_format_common.str_type except TypeError: pass # Create link in HDF5 to an existing dataset within the file if isinstance(ana_data, analysis_data) and isinstance(ana_data['dtype'], int): if curr_dtype == ana_data.ana_hdf5link: linkobject = data_group.file.get(ana_data['data']) data_group[ana_data['name']] = linkobject omsiobj = omsi_file_common.get_omsi_object(linkobject) try: # Check if we already have a type attribute _ = data_group[ana_data['name']].attrs[omsi_format_common.type_attribute] except: # Generate the type attribute from scratch if omsiobj is not None: omsiobjtype = omsiobj.__class__.__name__ else: omsiobjtype = "" data_group[ana_data['name']].attrs[ omsi_format_common.type_attribute] = omsiobjtype # Create a new string-type dataset elif (curr_dtype == omsi_format_common.str_type) or (curr_dtype == h5py.special_dtype(vlen=str)): tempdata = data_group.require_dataset(name=unicode(ana_data['name']), shape=(1,), dtype=omsi_format_common.str_type) if len(unicode(ana_data['data'])) > 0: if omsi_format_common.str_type_unicode: tempdata[0] = unicode(ana_data['data']) else: tempdata[0] = str(ana_data['data']) else: warnings.warn("WARNING: " + ana_data['name'] + " dataset generated but not written. The given dataset was empty.") # Create a new dataset to store the current numpy-type dataset elif 'numpy' in str(type(ana_data['data'])): # Decide whether we want to enable chunking for the current # analysis dataset chunks = None if ana_data['data'].size > 1000: chunks = True # Write the current analysis dataset if ana_data['data'].dtype.type in [np.string_, np.unicode_]: tempdata = data_group.require_dataset(name=ana_data['name'], shape=ana_data['data'].shape, dtype=omsi_format_common.str_type, chunks=chunks) else: tempdata = data_group.require_dataset(name=ana_data['name'], shape=ana_data['data'].shape, dtype=ana_data['data'].dtype, chunks=chunks) if ana_data['data'].size > 0: try: tempdata[:] = ana_data['data'] except TypeError: tempdata[()] = ana_data['data'] else: warnings.warn("WARNING: " + ana_data['name'] + " dataset generated but not written. The given dataset was empty.") # Unknown dtype. Attempt to convert the dataset to numpy and write it to # file. else: # Safely convert scalars to numpy but warn in case we see something else from omsi.datastructures.analysis_data import data_dtypes default_dtypes = data_dtypes.get_dtypes() if ana_data['dtype'] not in default_dtypes.keys() and ana_data['dtype'] not in default_dtypes.values(): warnings.warn("WARNING: " + str(ana_data['name']) + ": The data specified by the analysis object is not " + "in numpy format. Attempting to convert the data to numpy") try: dat = np.asarray(ana_data['data']) if len(dat.shape) == 0: dat = dat[np.newaxis] # np.asarray([ana_data['data']]) try: tempdata = data_group.require_dataset(name=ana_data['name'], shape=dat.shape, dtype=str(dat.dtype)) except TypeError: # Some Unicode types are not well-understood by h5py if 'U' in str(dat.dtype) or 'S' in str(dat.dtype): tempdata = data_group.require_dataset(name=ana_data['name'], shape=dat.shape, dtype=omsi_format_common.str_type) else: raise if dat.size > 0: try: tempdata[:] = dat except TypeError: tempdata[()] = dat else: warnings.warn(ana_data['name'] + " dataset generated but not written. The given dataset was empty.") except: warnings.warn("ERROR: " + str(ana_data['name']) + ": The data specified by the analysis could not be " + "converted to numpy for writing to HDF5")
def parse_cl_arguments(self): """ The function assumes that the command line parser has been setup using the initialize_argument_parser(..) This function parses all arguments that are specific to the command-line parser itself. Analysis workflow arguments are added and parsed later by the add_and_parse_workflow_arguments(...) function. The reason for this is two-fold: i) to separate the parsing of analysis arguments and arguments of the command-line driver and ii) if the same HDF5 file is used as input and output target, then we need to open it first here in append mode before it gets opened in read mode later by the arguments. *Side effects:* The function sets: - ``self.output_target`` - ``self.profile_analyses`` """ # Parse the arguments and convert them to a dict using vars parsed_arguments = vars(self.parser.parse_known_args()[0]) # Process the --save argument to determine where we should save the output if self.output_save_arg_name in parsed_arguments and mpi_helper.get_rank( ) == self.mpi_root: # Determine the filename and experiment group from the path self.output_target = parsed_arguments.pop( self.output_save_arg_name) if self.output_target is not None: output_filename, output_object_path = omsi_file_common.parse_path_string( self.output_target) # Create the output file if output_filename is None: raise ValueError( "ERROR: Invalid save parameter specification " + self.output_target) elif os.path.exists(output_filename ) and not os.path.isfile(output_filename): raise ValueError( "ERROR: Save parameter not specify a file.") if not os.path.exists(output_filename): out_file = omsi_file(output_filename, mode='a') self.output_target = out_file.create_experiment() self.__output_target_self = output_filename else: out_file = omsi_file(output_filename, mode='r+') if output_object_path is not None: self.output_target = omsi_file_common.get_omsi_object( out_file[output_object_path]) else: if out_file.get_num_experiments() > 0: self.output_target = out_file.get_experiment(0) else: self.output_target = out_file.create_experiment() else: self.output_target = parsed_arguments.pop( self.output_save_arg_name) # Process the --profile profiling argument if self.profile_arg_name in parsed_arguments: self.profile_analyses = parsed_arguments.pop(self.profile_arg_name) # Process the --memprofile argument if self.profile_mem_arg_name in parsed_arguments: self.profile_analyses_mem = parsed_arguments.pop( self.profile_mem_arg_name) # The --loglevel argument if self.log_level_arg_name in parsed_arguments: self.user_log_level = parsed_arguments.pop(self.log_level_arg_name) if self.user_log_level in log_helper.log_levels.keys(): log_helper.set_log_level( level=log_helper.log_levels[self.user_log_level]) else: self.user_log_level = None log_helper.error(module_name=__name__, message="Invalid log level specified") # The --script arguments if self.script_arg_name in parsed_arguments: self.script_files = parsed_arguments.pop(self.script_arg_name) if self.workflow_executor is None: self.create_workflow_executor_object() else: self.workflow_executor.add_analysis_from_scripts( script_files=self.script_files)
def __write_omsi_analysis_data__(cls, data_group, ana_data): """ Private helper function used to write the data defined by a analysis_data object to HDF5. :param data_group: The h5py data group to which the data should be written to. :param ana_data: The analysis_data object with the description of the data to be written. :type ana_data: omsi.analysis.analysis_data """ from omsi.datastructures.analysis_data import analysis_data, data_dtypes curr_dtype = ana_data['dtype'] try: if curr_dtype == data_dtypes.get_dtypes()['ndarray']: curr_dtype = ana_data['data'].dtype except TypeError: pass try: if curr_dtype == data_dtypes.get_dtypes()['bool']: curr_dtype = bool except TypeError: pass try: if curr_dtype == data_dtypes.get_dtypes()['str']: curr_dtype = omsi_format_common.str_type except TypeError: pass # Create link in HDF5 to an existing dataset within the file if isinstance(ana_data, analysis_data) and isinstance( ana_data['dtype'], int): if curr_dtype == ana_data.ana_hdf5link: linkobject = data_group.file.get(ana_data['data']) data_group[ana_data['name']] = linkobject omsiobj = omsi_file_common.get_omsi_object(linkobject) try: # Check if we already have a type attribute _ = data_group[ana_data['name']].attrs[ omsi_format_common.type_attribute] except: # Generate the type attribute from scratch if omsiobj is not None: omsiobjtype = omsiobj.__class__.__name__ else: omsiobjtype = "" data_group[ana_data['name']].attrs[ omsi_format_common.type_attribute] = omsiobjtype # Create a new string-type dataset elif (curr_dtype == omsi_format_common.str_type) or ( curr_dtype == h5py.special_dtype(vlen=str)): tempdata = data_group.require_dataset( name=unicode(ana_data['name']), shape=(1, ), dtype=omsi_format_common.str_type) if len(unicode(ana_data['data'])) > 0: if omsi_format_common.str_type_unicode: tempdata[0] = unicode(ana_data['data']) else: tempdata[0] = str(ana_data['data']) else: warnings.warn( "WARNING: " + ana_data['name'] + " dataset generated but not written. The given dataset was empty." ) # Create a new dataset to store the current numpy-type dataset elif 'numpy' in str(type(ana_data['data'])): # Decide whether we want to enable chunking for the current # analysis dataset chunks = None if ana_data['data'].size > 1000: chunks = True # Write the current analysis dataset if ana_data['data'].dtype.type in [np.string_, np.unicode_]: tempdata = data_group.require_dataset( name=ana_data['name'], shape=ana_data['data'].shape, dtype=omsi_format_common.str_type, chunks=chunks) else: tempdata = data_group.require_dataset( name=ana_data['name'], shape=ana_data['data'].shape, dtype=ana_data['data'].dtype, chunks=chunks) if ana_data['data'].size > 0: try: tempdata[:] = ana_data['data'] except TypeError: tempdata[()] = ana_data['data'] else: warnings.warn( "WARNING: " + ana_data['name'] + " dataset generated but not written. The given dataset was empty." ) # Unknown dtype. Attempt to convert the dataset to numpy and write it to # file. else: # Safely convert scalars to numpy but warn in case we see something else from omsi.datastructures.analysis_data import data_dtypes default_dtypes = data_dtypes.get_dtypes() if ana_data['dtype'] not in default_dtypes.keys( ) and ana_data['dtype'] not in default_dtypes.values(): warnings.warn( "WARNING: " + str(ana_data['name']) + ": The data specified by the analysis object is not " + "in numpy format. Attempting to convert the data to numpy") try: dat = np.asarray(ana_data['data']) if len(dat.shape) == 0: dat = dat[np.newaxis] # np.asarray([ana_data['data']]) try: tempdata = data_group.require_dataset( name=ana_data['name'], shape=dat.shape, dtype=str(dat.dtype)) except TypeError: # Some Unicode types are not well-understood by h5py if 'U' in str(dat.dtype) or 'S' in str(dat.dtype): tempdata = data_group.require_dataset( name=ana_data['name'], shape=dat.shape, dtype=omsi_format_common.str_type) else: raise if dat.size > 0: try: tempdata[:] = dat except TypeError: tempdata[()] = dat else: warnings.warn( ana_data['name'] + " dataset generated but not written. The given dataset was empty." ) except: warnings.warn( "ERROR: " + str(ana_data['name']) + ": The data specified by the analysis could not be " + "converted to numpy for writing to HDF5")
def __setitem__(self, key, value): """Overwrite the __setitem__ function inherited from dict to ensure that only elements with a specific set of keys can be modified""" from omsi.analysis.base import analysis_base from omsi.dataformat.file_reader_base import file_reader_base if key in self: if key == "omsi_object": if omsi_file_common.is_managed(value): dict.__setitem__(self, key, omsi_file_common.get_omsi_object(value)) elif isinstance(value, h5py.Dataset) or isinstance(value, h5py.Group): parent = value.parent if omsi_file_common.is_managed(parent): dict.__setitem__(self, 'omsi_object', omsi_file_common.get_omsi_object(parent)) dict.__setitem__(self, 'dataname', unicode(value.name.split('/')[-1])) # print super(dependency_dict,self).__str__() else: warnings.warn("The generated dependency does not point to a managed object.") dict.__setitem__(self, 'omsi_object', omsi_file_common.get_omsi_object(value)) dict.__setitem__(self, '_data', None) # Any previously loaded date may be invalid (delete) elif isinstance(value, analysis_base): dict.__setitem__(self, 'omsi_object', value) else: raise ValueError(str(value) + " invalid omsi_object parameter for " + "dependency_dict without valid data dependency.") elif key == 'selection': if value is None or (isinstance(value, basestring) and len(value) == 0): new_value = None else: from omsi.shared.data_selection import selection_to_string new_value = unicode(selection_to_string(selection=value)) dict.__setitem__(self, key, new_value) dict.__setitem__(self, '_data', None) # Any previously loaded data may be invalid (delete) elif key == 'dataname': if not isinstance(value, basestring): raise ValueError('Dataname must be a string') dict.__setitem__(self, 'dataname', unicode(value)) dict.__setitem__(self, '_data', None) # Any previously loaded data may be invalid (delete) elif key == 'param_name': if not isinstance(value, basestring): raise ValueError('param_name must be a string') dict.__setitem__(self, 'param_name', unicode(value)) elif key == 'link_name': if not isinstance(value, basestring): raise ValueError('link_name must be a string') dict.__setitem__(self, 'link_name', unicode(value)) elif key == '_data': raise KeyError('_data key is managed by dependency_dict. Explicit definition of _data not permitted.') elif key == 'help': if isinstance(value, basestring): dict.__setitem__(self, 'help', unicode(value)) elif key == 'dependency_type': if value in self.dependency_types.values(): dict.__setitem__(self, 'dependency_type', value) else: raise ValueError('Unknown dependency type specified. Valid types are: ' + str(self.dependency_types)) else: dict.__setitem__(self, key, value) # print super(dependency_dict,self).__str__() else: raise KeyError("\'"+str(key)+'\' key not in default key set of dependency_dict')