Exemplo n.º 1
0
    def run(self):
        """
        Call this function to run the function in parallel.

        :return: Tuple with the following elements:

            1) List with the results from the local execution of the task_function. Each
               entry is the result from one return of the task_function. In the case of static
               execution, this is always a list of length 1.
            2) List of block_indexes. Each block_index is a tuple with the selection used to
               divide the data into sub-blocks. In the case of static decomposition we have
               a range slice object along the axes used for decomposition whereas in the
               case of dynamic scheduling we usually have single integer point selections
               for each task.

        """
        try:
            from omsi.shared.log import log_helper
        except ImportError:
            from pactolus.third_party.log import log_helper
        start_time = time.time()
        self.__data_collected = False
        if self.schedule == self.SCHEDULES['DYNAMIC']:
            result = self.__run_dynamic()
        elif self.schedule == self.SCHEDULES['STATIC_1D'] or self.schedule == self.SCHEDULES['STATIC']:
            result = self.__run_static_1D()
        else:
            log_helper.error(__name__, "Invalid scheduling scheme given: " + str(self.schedule))
            raise ValueError("Invalid scheduling scheme given: " + str(self.schedule))
        end_time = time.time()
        self.run_time = end_time - start_time
        return result
Exemplo n.º 2
0
    def remove_output_target(self):
        """
        This function is used to delete any output target files created by the
        command line driver. This is done in case that an error occurred and
        we do not want to leave garbage files left over.

        *Side effects* The function modifies ``self.output_target``

        :return: Boolean indicating whether we succesfully cleaned up the output

        """
        success = False
        if self.__output_target_self is not None:
            try:
                os.remove(self.__output_target_self)
                log_helper.info(
                    __name__, "Successfully removed output target: " +
                    unicode(self.__output_target_self))
                success = True
            except:
                log_helper.error(
                    __name__,
                    "Clean-up of output failed. File may be left on system: " +
                    unicode(self.__output_target_self))
        elif self.output_target is not None:
            log_helper.info(
                __name__,
                "Output target not removed because it was not created " +
                "by the analysis but potentially modified by it")
        else:
            success = True
        return success
Exemplo n.º 3
0
    def __getitem__(self,
                    key):
        """Custom slicing. Return the value associated with the given key if it is one of our predefined keys.
           Otherwise, assume that the user wants to slice into the data associated with the dependency and
           return get_data()[key] instead. If get_data() is not ready (i.e., creates a dependency), then a
           new dependency_dict is returned that adds the selection.

           :param key: The key to be used for slicing

           :returns: Value. May return a dependency_dict if the selection refers to the data object and the
                dependency cannot be resolved yet.
        """
        if key in self.keys():
            return dict.__getitem__(self, key)
        else:
            data_ref = self.get_data()
            if isinstance(data_ref, dependency_dict):
                if self['selection'] is not None:
                    log_helper.error(__name__, "The current dependency already has a selection. Refinement of " +
                                     "existing selections is not yet supported. A new dependency with the full " +
                                     "current selection will be used instead.")
                copy_ref = self.copy()
                copy_ref['selection'] = key
                return copy_ref
            else:
                return self.get_data()[key]
Exemplo n.º 4
0
    def remove_output_target(self):
        """
        This function is used to delete any output target files created by the
        command line driver. This is done in case that an error occurred and
        we do not want to leave garbage files left over.

        *Side effects* The function modifies ``self.output_target``

        :return: Boolean indicating whether we succesfully cleaned up the output
        """
        success = False
        if self.__output_target_self is not None:
            try:
                os.remove(self.__output_target_self)
                log_helper.info(__name__, "Successfully removed output target: " + unicode(self.__output_target_self))
                success = True
            except:
                log_helper.error(__name__, "Clean-up of output failed. File may be left on system: "
                                 + unicode(self.__output_target_self))
        elif self.output_target is not None:
            log_helper.info(__name__, "Output target not removed because it was not created " +
                                      "by the analysis but potentially modified by it")
        else:
            success = True
        return success
Exemplo n.º 5
0
    def run(self):
        """
        Call this function to run the function in parallel.

        :return: Tuple with the following elements:

            1) List with the results from the local execution of the task_function. Each
               entry is the result from one return of the task_function. In the case of static
               execution, this is always a list of length 1.
            2) List of block_indexes. Each block_index is a tuple with the selection used to
               divide the data into sub-blocks. In the case of static decomposition we have
               a range slice object along the axes used for decomposition whereas in the
               case of dynamic scheduling we usually have single integer point selections
               for each task.

        """
        from omsi.shared.log import log_helper
        start_time = time.time()
        self.__data_collected = False
        if self.schedule == self.SCHEDULES['DYNAMIC']:
            result = self.__run_dynamic()
        elif self.schedule == self.SCHEDULES['STATIC_1D'] or self.schedule == self.SCHEDULES['STATIC']:
            result = self.__run_static_1D()
        else:
            log_helper.error(__name__, "Invalid scheduling scheme given: " + str(self.schedule))
            raise ValueError("Invalid scheduling scheme given: " + str(self.schedule))
        end_time = time.time()
        self.run_time = end_time - start_time
        return result
Exemplo n.º 6
0
    def parse_cl_arguments(self):
        """
        The function assumes that the command line parser has been setup using the initialize_argument_parser(..)

        This function parses all arguments that are specific to the command-line parser itself. Analysis
        arguments are added and parsed later by the add_and_parse_analysis_arguments(...) function.
        The reason for this is two-fold: i) to separate the parsing of analysis arguments and arguments of the
        command-line driver and ii) if the same HDF5 file is used as input and output target, then we need to
        open it first here in append mode before it gets opened in read mode later by the arguments.

        *Side effects:* The function sets ``self.output_target`` and ``self.profile_analysis``

        """
        # Parse the arguments and convert them to a dict using vars
        parsed_arguments = vars(self.parser.parse_known_args()[0])

        # Clean up the arguments to remove default arguments of the driver class
        # before we hand the arguments to the analysis class
        if self.analysis_class_arg_name in parsed_arguments:
            parsed_arguments.pop(self.analysis_class_arg_name)

        # Process the --save argument to determine where we should save the output
        if self.output_save_arg_name in parsed_arguments and mpi_helper.get_rank() == self.mpi_root:
            # Determine the filename and experiment group from the path
            self.output_target = parsed_arguments.pop(self.output_save_arg_name)
            if self.output_target is not None:
                output_filename, output_object_path = omsi_file_common.parse_path_string(self.output_target)
                # Create the output file
                if output_filename is None:
                    raise ValueError("ERROR: Invalid save parameter specification " + self.output_target)
                elif os.path.exists(output_filename) and not os.path.isfile(output_filename):
                    raise ValueError("ERROR: Save parameter not specify a file.")
                if not os.path.exists(output_filename):
                    out_file = omsi_file(output_filename, mode='a')
                    self.output_target = out_file.create_experiment()
                    self. __output_target_self = output_filename
                else:
                    out_file = omsi_file(output_filename, mode='r+')
                    if output_object_path is not None:
                        self.output_target = omsi_file_common.get_omsi_object(out_file[output_object_path])
                    else:
                        if out_file.get_num_experiments() > 0:
                            self.output_target = out_file.get_experiment(0)
                        else:
                            self.output_target = out_file.create_experiment()
        else:
            self.output_target = parsed_arguments.pop(self.output_save_arg_name)

        # The --loglovel argument
        if self.log_level_arg_name in parsed_arguments:
            user_log_level = parsed_arguments.pop(self.log_level_arg_name)
            if user_log_level in log_helper.log_levels.keys():
                log_helper.set_log_level(level=log_helper.log_levels[user_log_level])
            else:
                log_helper.error(module_name=__name__, message="Invalid log level specified")
Exemplo n.º 7
0
    def get_metadata(self, key=None):
        """
        Get dict with the full description of the metadata for the given key or all
        metadata if no key is given.

        :param key: The name of the metadata object to be retrieved. Default is None in
                    which case all metadata will be retrieved.

        :returns: `omsi.shared.metadata_data.metadata_value` object if a key is given
            or a `omsi.shared.metadata_data.metadata_dict` with all metadata
            if key is set to None.

        :raises: KeyError is raised in case that the specified key does not exist
        """
        descr_attr = omsi_format_metadata_collection.description_value_attribute
        unit_attr = omsi_format_metadata_collection.unit_value_attribute
        ontology_attr = omsi_format_metadata_collection.ontology_value_attribute
        isjson_attr = omsi_format_metadata_collection.is_json_dict_attribute
        if key is None:
            output_meta_dict = metadata_dict()
            for metadata_name, metadata_dataset in self.managed_group.iteritems():
                unit = None if unit_attr not in metadata_dataset.attrs else metadata_dataset.attrs[unit_attr]
                description = None if descr_attr not in metadata_dataset.attrs else metadata_dataset.attrs[descr_attr]
                ontology = None if ontology_attr not in metadata_dataset.attrs else \
                    json.loads(metadata_dataset.attrs[ontology_attr])
                md_value = metadata_dataset[:] if len(metadata_dataset.shape) > 0 else metadata_dataset[()]
                if isjson_attr in metadata_dataset.attrs:
                    try:
                        md_value = json.loads(md_value)
                    except:
                        log_helper.error(__name__, "Parsing of json metadata object failed for " + str(metadata_name))
                output_meta_dict[metadata_name] = metadata_value(
                    name=metadata_name,
                    value=md_value,
                    description=description,
                    unit=unit,
                    ontology=ontology)
            return output_meta_dict
        else:
            metadata_dataset = self.managed_group[key]
            unit = None if unit_attr not in metadata_dataset.attrs else metadata_dataset.attrs[unit_attr]
            description = None if descr_attr not in metadata_dataset.attrs else metadata_dataset.attrs[descr_attr]
            ontology = None if ontology_attr not in metadata_dataset.attrs else \
                json.loads(metadata_dataset.attrs[ontology_attr])
            return metadata_value(
                name=key,
                value=metadata_dataset[:],
                description=description,
                unit=unit,
                ontology=ontology)
Exemplo n.º 8
0
 def v_qslice(cls,
              analysis_object,
              z,
              viewer_option=0):
     """Implement support for qslice URL requests for the viewer"""
     from omsi.shared.data_selection import selection_string_to_object
     if viewer_option == 0:
         dataset = analysis_object['peak_cube']
         try:
             z_select = selection_string_to_object(selection_string=z)
             data = dataset[:, :, z_select]
             return data
         except:
             log_helper.error(__name__, "Global peak selection failed. ")
             return None
     elif viewer_option >= 0:
         return super(omsi_findpeaks_global, cls).v_qslice(analysis_object, z, viewer_option-1)
     else:
         return None
Exemplo n.º 9
0
    def get_data(self):
        """Get the data associated with the dependency.

           :returns: If a selection is applied and the dependency object supports
                     array data load (e.g., h5py.Dataset, omsi_file_msidata), then
                     the selected data will be loaded and returned as numpy array.
                     Otherwise the ['omsi_object'] is returned.
        """
        # Return preloaded data if available
        if self['_data'] is not None:
            return self['_data']
        # Check if we can access the data object
        else:
            # Retrieve the data object
            if self['dataname']:
                data_object = self['omsi_object'][self['dataname']]
                # Ensure that the dependency can actually be resolved. E.g, if the data the dependency points to
                # is not ready yet then we may get a dependency back that points to the same object as we do,
                # which in turn could result in an endless recursion
                if isinstance(data_object, dependency_dict):
                    if data_object['omsi_object'] is self['omsi_object']:
                        return self
            else:
                data_object = self['omsi_object']
            # Resolve any data seletions
            try:
                if self['selection'] is None:
                    # data = data_object[:]
                    # self['_data'] = data_object
                    return data_object
                else:
                    from omsi.shared.data_selection import selection_string_to_object
                    current_selection = selection_string_to_object(self['selection'])
                    if current_selection is not None:
                        dict.__setitem__(self, '_data', data_object[current_selection])
                    else:
                        raise ValueError('Invalid selection string')
                return self['_data']
            except:
                raise
                import sys
                log_helper.error(__name__, "Application of data selection failed. " + str(sys.exc_info()))
                return data_object
Exemplo n.º 10
0
    def create_analysis_static(analysis_parent,
                               analysis,
                               flush_io=True,
                               force_save=False,
                               save_unsaved_dependencies=True,
                               mpi_root=0,
                               mpi_comm=None):
        """
        Same as create_analysis(...) but instead of relying on object-level, this function
        allows additional parameters (specifically the analysis_parent) to be provided as
        input, rather than being determined based on self

        :param analysis_parent: The h5py.Group object or omsi.dataformat.omsi_file.common.omsi_file_common object
            where the analysis should be created
        :param kwargs: Additional keyword arguments for create_analysis(...). See create_analysis(...) for details.

        :return: The output of create_analysis
        """
        if mpi_helper.get_rank(comm=mpi_comm) == mpi_root:
            if isinstance(analysis_parent, h5py.Group):
                parent_group = analysis_parent
            elif isinstance(analysis_parent, omsi_file_common):
                parent_group = analysis_parent.managed_group
            else:
                log_helper.error(
                    __name__,
                    'Illegal analysis_parent type. Expected h5py.Group or omsi_file_common'
                )
                raise ValueError("Illegal value for analysis parent")
            return omsi_file_analysis.__create__(
                parent_group=parent_group,
                analysis=analysis,
                analysis_index=None,  # Same as self.get_num_analysis()
                flush_io=flush_io,
                force_save=force_save,
                save_unsaved_dependencies=save_unsaved_dependencies)
        else:
            try:
                analysis.write_analysis_data()
                return None
            except NotImplementedError:
                pass
Exemplo n.º 11
0
    def create_analysis_static(analysis_parent,
                               analysis,
                               flush_io=True,
                               force_save=False,
                               save_unsaved_dependencies=True,
                               mpi_root=0,
                               mpi_comm=None):
        """
        Same as create_analysis(...) but instead of relying on object-level, this function
        allows additional parameters (specifically the analysis_parent) to be provided as
        input, rather than being determined based on self

        :param analysis_parent: The h5py.Group object or omsi.dataformat.omsi_file.common.omsi_file_common object
            where the analysis should be created
        :param kwargs: Additional keyword arguments for create_analysis(...). See create_analysis(...) for details.

        :return: The output of create_analysis
        """
        if mpi_helper.get_rank(comm=mpi_comm) == mpi_root:
            if isinstance(analysis_parent, h5py.Group):
                parent_group = analysis_parent
            elif isinstance(analysis_parent, omsi_file_common):
                parent_group = analysis_parent.managed_group
            else:
                log_helper.error(__name__, 'Illegal analysis_parent type. Expected h5py.Group or omsi_file_common')
                raise ValueError("Illegal value for analysis parent")
            return omsi_file_analysis.__create__(parent_group=parent_group,
                                                 analysis=analysis,
                                                 analysis_index=None,  # Same as self.get_num_analysis()
                                                 flush_io=flush_io,
                                                 force_save=force_save,
                                                 save_unsaved_dependencies=save_unsaved_dependencies)
        else:
            try:
                analysis.write_analysis_data()
                return None
            except NotImplementedError:
                pass
Exemplo n.º 12
0
    def main(self):
        """
        Default main function for running an analysis from the command line.
        The default implementation exposes all specified analysis parameters as command
        line options to the user. The default implementation also provides means to
        print a help text for the function.

        :raises: ValueError is raised in case that the analysis class is unknown

        """

        # Initialize the argument parser
        if self.parser is None:
            self.initialize_argument_parser()

        try:
            # Parse the command line arguments to determine the command line driver settings
            self.parse_cl_arguments()
        except:
            self.remove_output_target()
            raise

        if self.workflow_executor is None:
            self.remove_output_target()
            log_helper.error(
                __name__,
                'Missing --script parameter or worfklow_executor object')
            raise ValueError('Workflow not initalized')

        # Add and parse the command line arguments specific to the analysis to determine the analysis settings
        try:
            self.add_and_parse_workflow_arguments()
        except:
            self.remove_output_target()
            raise

        # Print the analysis settings
        if mpi_helper.get_rank() == self.mpi_root:
            self.print_settings()

        # Enable time and usage profiling
        try:
            # Enable time and usage profiling if requested
            if self.profile_analyses:
                try:
                    self.workflow_executor.analysis_tasks.enable_time_and_usage_profiling(
                        self.profile_analyses)
                except ImportError as e:
                    log_helper.warning(
                        __name__,
                        "Profiling of time and usage not available due to missing packages."
                    )
                    log_helper.warning(__name__, e.message)
            # Enable memory profiling if requested
            if self.profile_analyses_mem:
                try:
                    self.workflow_executor.analysis_tasks.enable_memory_profiling(
                        self.profile_analyses_mem)
                except ImportError as e:
                    log_helper.warning(
                        __name__,
                        "Profiling of memory usage not available due to missing packages"
                    )
                    log_helper.warning(__name__, e.message)
        except:
            if mpi_helper.get_rank() == self.mpi_root:
                self.remove_output_target()
            raise

        # Execute the analysis
        try:
            log_helper.debug(__name__,
                             'Analysis arguments: ' +
                             str(self.analysis_arguments),
                             root=self.mpi_root,
                             comm=self.mpi_comm)
            self.workflow_executor.execute()
        except:
            if mpi_helper.get_rank() == self.mpi_root:
                self.remove_output_target()
            raise

        # Finalize the saving of results on rank our mpi root rank. NOTE: When running in serial
        # the condition of  mpi_helper.get_rank() ==  self.mpi_root evaluates to True because
        # our mpi_root is 0 and the mpi_helper returns 0 for the rank when running in serial.
        if mpi_helper.get_rank() == self.mpi_root:

            # Print usage profiles if available
            try:
                self.print_time_and_usage_profiles()
            except:
                log_helper.error(
                    __name__,
                    "An error occured while trying to print time and usage profiles",
                    root=self.mpi_root,
                    comm=self.mpi_comm)

            # Print memory profile data if available
            try:
                self.print_memory_profiles()
            except:
                log_helper.error(
                    __name__,
                    "An error occured while trying to print memory profiles",
                    root=self.mpi_root,
                    comm=self.mpi_comm)

            # Print the time it took to run the analysis
            try:
                # Parallel case: We need to compile/collect timing data from all cores
                if isinstance(
                        self.workflow_executor.run_info['execution_time'],
                        list):
                    # Time for each task to execute
                    log_helper.info(
                        __name__,
                        "Time in seconds for each analysis process: " +
                        str(self.workflow_executor.run_info['execution_time']),
                        root=self.mpi_root,
                        comm=self.mpi_comm)
                    # Start times of each task
                    log_helper.info(
                        __name__,
                        "Time when each of the processes started: " +
                        str(self.workflow_executor.run_info['start_time']),
                        root=self.mpi_root,
                        comm=self.mpi_comm)
                    # Stop times for each task

                    log_helper.info(
                        __name__,
                        "Time when each of the processes finished: " +
                        str(self.workflow_executor.run_info['end_time']),
                        root=self.mpi_root,
                        comm=self.mpi_comm)

                    # Compile the time to execute string
                    exec_time_array = np.asarray(
                        self.workflow_executor.run_info['execution_time'],
                        dtype=float)
                    max_exec_time = str(exec_time_array.max())
                    min_exec_time = str(exec_time_array.min())
                    mean_exec_time = str(exec_time_array.mean())
                    exec_time_string = max_exec_time + " s " + \
                        "    ( min = " + min_exec_time + " , mean = " + mean_exec_time + " )"
                # Serial case: We only have a single time to worry about
                else:
                    exec_time_string = str(self.workflow_executor.
                                           run_info['execution_time']) + " s"
                log_helper.info(__name__,
                                "Time to execute analysis: " +
                                exec_time_string,
                                root=self.mpi_root,
                                comm=self.mpi_comm)
            except:
                raise

        # Save the analysis to file
        if self.output_target is not None:
            from omsi.dataformat.omsi_file.analysis import omsi_analysis_manager
            for analysis in self.workflow_executor.analysis_tasks:
                omsi_analysis_manager.create_analysis_static(
                    analysis_parent=self.output_target, analysis=analysis)
Exemplo n.º 13
0
    def __init__(self, hdr_filename=None, t2m_filename=None, img_filename=None, basename=None, requires_slicing=True):
        """Open an img file for data reading.

            :param hdr_filename: The name of the hdr header file
            :type hdr_filename: string

            :param t2m_filename: The name of the t2m_filename
            :type t2m_filename: string

            :param img_filename: The name of the img data file
            :type img_filename: string

            :param basename: Instead of img_filename, t2m_filename, and hdr_filename one may also supply just
                             a single basename. The basename is completed with the .img, .t2m, .hdr extension
                             to load the data.
            :type basename: string

            :param requires_slicing: Unused here. Slicing is always supported by this reader.
            :type requires_slicing: Boolean

            :raises ValueError: In case that basename and hdr_filename, t2m_filename, and img_filename are specified.
        """
        super(img_file, self).__init__(basename, requires_slicing)
        self.data_type = 'uint16'
        self.shape = [0, 0, 0]  # Number of pixels in x,y, and z. NOTE: Type changed to tuple later on.
        self.mz = 0  # A numpy vector with the m/z values of the instrument

        if basename and hdr_filename and t2m_filename and img_filename:
            raise ValueError(
                "Conflicting input. Provide either basename or the " +
                "hdr_filename,t2m_filename,img_filename parameters but not both.")
        if basename:
            basefile = basename
            if os.path.isdir(basename):
                filelist = self.get_files_from_dir(basename)
                log_helper.log_var(__name__, filelist=filelist)
                if len(filelist) > 0:
                    basefile = filelist[0]
                else:
                    raise ValueError("No valid img file found in the given directory.")
            elif basefile.endswith(".img") and os.path.exists(basefile):
                basefile = basefile.rstrip(".img")
            elif basefile.endswith(".hdr") and os.path.exists(basefile):
                basefile = basefile.rstrip(".hdr")
            elif basefile.endswith(".t2m") and os.path.exists(basefile):
                basefile = basefile.rstrip(".t2m")

            log_helper.log_var(__name__, basefile=basefile)
            if os.path.exists(basefile + ".hdr") and \
                    os.path.exists(basefile + ".t2m") and \
                    os.path.exists(basefile + ".img"):
                hdr_filename = basefile + ".hdr"
                t2m_filename = basefile + ".t2m"
                img_filename = basefile + ".img"
            else:
                raise ValueError("No valid img file found for the given basename.")
        elif hdr_filename and t2m_filename and img_filename:
            pass  # Nothing to be done
        else:
            raise ValueError("Missing input parameter. Either provide: " +
                             " i) basename or ii) hdr_filename, t2m_filename, img_filename")

        # Initialize the x and y length
        hdr = open(hdr_filename, 'rb')
        hdrdata = np.fromfile(file=hdr_filename, dtype='int16', count=-1)
        self.shape[0] = int(hdrdata[23])
        self.shape[1] = int(hdrdata[22])
        hdr.close()

        # Initialize the z length
        t2m = open(t2m_filename, 'rb')
        self.mz = np.fromfile(file=t2m, dtype='float32', count=-1)
        self.shape[2] = self.mz.shape[0]
        t2m.close()

        # Convert the shape variable to the expected tuple
        self.shape = tuple(self.shape)

        # Open the img file with the spectrum data
        self.img_filename = img_filename
        self.file_opened = False
        try:
            self.m_img_file = np.memmap(filename=self.img_filename,
                                        dtype=self.data_type,
                                        shape=self.shape,
                                        mode='r',
                                        order='C')
            self.file_opened = True
        except ValueError:
            # Check if the size of the file matches what we expect
            imgsize = os.stat(self.img_filename).st_size
            itemsize = np.dtype(self.data_type).itemsize
            expectednumvalues = int(self.shape[0]) * int(self.shape[1]) * int(self.shape[2])
            expectedsize = expectednumvalues * int(itemsize)
            sizedifference = expectedsize - imgsize
            log_helper.warning(__name__ , "IMG size: " + str(imgsize) + " Expected size: " + \
                                          str(expectedsize) + "  (difference="+str(sizedifference) + ")")
            if imgsize < expectedsize:
                # Check whether the missing data aligns with images or spectra
                slicesize = int(self.shape[0]) * int(self.shape[1]) * itemsize
                spectrumsize = int(self.shape[2]) * itemsize
                percentmissing = float(sizedifference)/float(expectedsize)
                valuesmissing = float(sizedifference) / itemsize
                warnings.warn("WARNING: Missing "+str(sizedifference) +
                              " bytes in img file (missing " + str(valuesmissing) +
                              " intensity values; "+str(percentmissing)+"%)." +
                              " Expected shape: "+str(self.shape))
                # Define how we should deal with the error
                expandslice = (sizedifference % slicesize) == 0
                expandspectra = (sizedifference % spectrumsize) == 0
                if not expandslice:
                    expandspectra = True
                # Complete missing spectra
                if expandspectra:
                    warnings.warn("Dealing with missing data in img file by completing last spectra with 0's.")
                    # TODO np.require create an in-memory copy of the full data. Allow usage of memmap'ed tempfile.
                    tempmap = np.require(np.memmap(filename=self.img_filename,
                                                   dtype=self.data_type,
                                                   mode='r',
                                                   order='C'),
                                         requirements=['O', 'C'])
                    # Extend the memmap to the expected size
                    tempmap.resize((expectednumvalues, ))
                    # Reshape the memmap to the expected shape
                    self.m_img_file = tempmap.reshape(self.shape, order='C')
                    self.file_opened = True
                # Complete missing slices
                elif expandslice:
                    slicesmissing = sizedifference / slicesize
                    self.mz = self.mz[:(-slicesmissing)]
                    warnings.warn("Dealing with missing data in img file by updating he m/z axis.." +
                                  " It looks like the m/z axis data may be inconsistent" +
                                  " with the binary data. Removing "+str(slicesmissing) +
                                  " bins from the m/z axis.")
                    self.shape = list(self.shape)
                    self.shape[2] = self.mz.shape[0]
                    self.shape = tuple(self.shape)
                    self.m_img_file = np.memmap(filename=self.img_filename,
                                                dtype=self.data_type,
                                                shape=self.shape,
                                                mode='r',
                                                order='C')
                    self.file_opened = True
                else:
                    raise
            else:
                raise
        except:
            log_helper.error(__name__, "Error while opening the img file: " + img_filename)
            raise
Exemplo n.º 14
0
    def add_metadata(self, metadata):
        """
        Add a new metadata entry

        :param metadata: Instance of `omsi.shared.metadata_data.metadata_value` or
            describing `omsi.shared.metadata_data.metadata_dict` with the
            metadata to be added.

        """
        if metadata is None:
            return

        if isinstance(metadata,  metadata_dict):
            for metadata_val in metadata.values():
                self.add_metadata(metadata_val)
            return

        metadata_dataset = None
        if isinstance(metadata['value'], basestring):
            metadata_dataset = self.managed_group.require_dataset(name=unicode(metadata['name']),
                                                                  shape=(1,),
                                                                  dtype=omsi_format_common.str_type)
            metadata_dataset[0] = unicode(metadata['value']) if \
                omsi_format_common.str_type_unicode else \
                str(metadata['value'])
        else:
            md_value = metadata['value']
            if md_value is not None:
                # Save different data types
                if isinstance(md_value, dict):
                    try:
                        json_value = json.dumps(md_value)
                        self.managed_group[metadata['name']] = json_value
                        isjson_attr_name = omsi_format_metadata_collection.is_json_dict_attribute
                        self.managed_group[metadata['name']].attrs[isjson_attr_name] = True
                        metadata_dataset = self.managed_group[metadata['name']]
                    except:
                        log_helper.error(__name__, "Metadata object for " + str(metadata['name'] +
                                         "is not JSON serializable"))
                        metadata_dataset = None

                elif isinstance(md_value, list):
                    try:
                        self.managed_group[metadata['name']] = np.asarray(metadata['value'])
                        metadata_dataset = self.managed_group[metadata['name']]
                    except:
                        log_helper.error(__name__, "Conversion and saving of list metadata failed for: " +
                                         str(metadata['name']))
                        metadata_dataset = None
                else:
                    self.managed_group[metadata['name']] = metadata['value']
                    metadata_dataset = self.managed_group[metadata['name']]
            else:
                self.managed_group[metadata['name']] = float_nan
                metadata_dataset = self.managed_group[metadata['name']]

        if metadata_dataset is not None:
            metadata_dataset.attrs[omsi_format_metadata_collection.description_value_attribute] = metadata['description']
            if metadata['unit'] is not None:
                metadata_dataset.attrs[omsi_format_metadata_collection.unit_value_attribute] = metadata['unit']
            if metadata['ontology'] is not None:
                ontology_value = metadata['ontology'] if isinstance(metadata['ontology'], basestring) \
                    else json.dumps(metadata['ontology'])
                metadata_dataset.attrs[omsi_format_metadata_collection.ontology_value_attribute] = ontology_value
Exemplo n.º 15
0
    def parse_cl_arguments(self):
        """
        The function assumes that the command line parser has been setup using the initialize_argument_parser(..)

        This function parses all arguments that are specific to the command-line parser itself. Analysis workflow
        arguments are added and parsed later by the add_and_parse_workflow_arguments(...) function.
        The reason for this is two-fold: i) to separate the parsing of analysis arguments and arguments of the
        command-line driver and ii) if the same HDF5 file is used as input and output target, then we need to
        open it first here in append mode before it gets opened in read mode later by the arguments.

        *Side effects:* The function sets:

            - ``self.output_target``
            - ``self.profile_analyses``

        """
        # Parse the arguments and convert them to a dict using vars
        parsed_arguments = vars(self.parser.parse_known_args()[0])

        # Process the --save argument to determine where we should save the output
        if self.output_save_arg_name in parsed_arguments and mpi_helper.get_rank(
        ) == self.mpi_root:
            # Determine the filename and experiment group from the path
            self.output_target = parsed_arguments.pop(
                self.output_save_arg_name)
            if self.output_target is not None:
                output_filename, output_object_path = omsi_file_common.parse_path_string(
                    self.output_target)
                # Create the output file
                if output_filename is None:
                    raise ValueError(
                        "ERROR: Invalid save parameter specification " +
                        self.output_target)
                elif os.path.exists(output_filename
                                    ) and not os.path.isfile(output_filename):
                    raise ValueError(
                        "ERROR: Save parameter not specify a file.")
                if not os.path.exists(output_filename):
                    out_file = omsi_file(output_filename, mode='a')
                    self.output_target = out_file.create_experiment()
                    self.__output_target_self = output_filename
                else:
                    out_file = omsi_file(output_filename, mode='r+')
                    if output_object_path is not None:
                        self.output_target = omsi_file_common.get_omsi_object(
                            out_file[output_object_path])
                    else:
                        if out_file.get_num_experiments() > 0:
                            self.output_target = out_file.get_experiment(0)
                        else:
                            self.output_target = out_file.create_experiment()
        else:
            self.output_target = parsed_arguments.pop(
                self.output_save_arg_name)

        # Process the --profile profiling argument
        if self.profile_arg_name in parsed_arguments:
            self.profile_analyses = parsed_arguments.pop(self.profile_arg_name)

        # Process the --memprofile argument
        if self.profile_mem_arg_name in parsed_arguments:
            self.profile_analyses_mem = parsed_arguments.pop(
                self.profile_mem_arg_name)

        # The --loglevel argument
        if self.log_level_arg_name in parsed_arguments:
            self.user_log_level = parsed_arguments.pop(self.log_level_arg_name)
            if self.user_log_level in log_helper.log_levels.keys():
                log_helper.set_log_level(
                    level=log_helper.log_levels[self.user_log_level])
            else:
                self.user_log_level = None
                log_helper.error(module_name=__name__,
                                 message="Invalid log level specified")

        # The --script arguments
        if self.script_arg_name in parsed_arguments:
            self.script_files = parsed_arguments.pop(self.script_arg_name)
            if self.workflow_executor is None:
                self.create_workflow_executor_object()
            else:
                self.workflow_executor.add_analysis_from_scripts(
                    script_files=self.script_files)
Exemplo n.º 16
0
"""
MIDAS spectrum analysis
"""

# TODO  Add support for using centroided MSI data directly by removing 0's from the intensity and mz array
# TODO  We can further parallelize the callculations by splitting the compound list up as well (not just the spectra)

from omsi.analysis.base import analysis_base
import omsi.shared.mpi_helper as mpi_helper
from omsi.shared.log import log_helper
try:
    from pactolus import score_frag_dag
except ImportError:
    log_helper.error(
        __name__,
        "Import of Pactolus failed. The  omsi_score_pactolus module will not work."
    )
import os
import numpy as np
import time
import sys


class omsi_score_pactolus(analysis_base):
    """
    Class for executing Pactolus on a local peak finding dataset.
    """
    def __init__(self, name_key="undefined"):
        """
        Initialize the basic data members
        """
Exemplo n.º 17
0
    def get_analysis_class_from_cl(self):
        """
        Internal helper function used to get the analysis class object based on the
        analysis_class_arg_name positional argument from the command line.

        *Side effects:* The function sets ``self.analysis_class`

        :raises: ImportError in case that the analysis module cannot be loaded
        :raises: AttributeError in case that the analysis class cannot be extracted from the module
        """
        if len(sys.argv) < 2 or sys.argv[1].startswith('--'):
            raise ValueError("Missing required input argument defining the analysis to be executed missing")

        # Get the analysis class we need to operate on as the first positional argument
        # Get the name of the analysis class and format the string to remove common formatting problems.
        analysis_script = sys.argv[1].replace('/', '.')
        if analysis_script.endswith('.py'):
            analysis_script = analysis_script.rstrip('.py')
        if analysis_script.startswith('.'):
            analysis_script = analysis_script.lstrip('.')

        # Determine the name of the module and name from the string
        analysis_class_name = analysis_script.split('.')[-1]
        analysis_module_name = analysis_script.rstrip(analysis_class_name)[:-1]
        if not analysis_module_name.startswith('omsi.analysis'):
            analysis_module_name = 'omsi.analysis.' + analysis_module_name

        # Import the module
        try:
            analysis_module_object = __import__(analysis_module_name, globals(), locals(), [analysis_class_name], -1)
        except ImportError as e:
            log_helper.error(__name__, e.message)
            log_helper.error(__name__, "Could not locate module " + analysis_module_name,
                             root=self.mpi_root, comm=self.mpi_comm)
            log_helper.error(__name__, "Please check the name of the module. Maybe there is a spelling error.",
                             root=self.mpi_root, comm=self.mpi_comm)
            raise

        # Determine the self.analysis parameter
        try:
            self.analysis_class = getattr(analysis_module_object, analysis_class_name)
        except AttributeError as e:
            log_helper.error(__name__, e.message)
            log_helper.error(__name__, "Could not locate " + analysis_class_name + " in " + analysis_module_name,
                             root=self.mpi_root, comm=self.mpi_comm)
            log_helper.error(__name__, "Please check the name of the analysis. Maybe there is a spelling error.",
                             root=self.mpi_root, comm=self.mpi_comm)
            raise
Exemplo n.º 18
0
"""
MIDAS spectrum analysis
"""

# TODO  Add support for using centroided MSI data directly by removing 0's from the intensity and mz array
# TODO  We can further parallelize the callculations by splitting the compound list up as well (not just the spectra)

from omsi.analysis.base import analysis_base
import omsi.shared.mpi_helper as mpi_helper
from omsi.shared.log import log_helper
try:
    from pactolus import score_frag_dag
except ImportError:
    log_helper.error(__name__, "Import of Pactolus failed. The  omsi_score_pactolus module will not work.")
import os
import numpy as np
import time
import sys

class omsi_score_pactolus(analysis_base):
    """
    Class for executing Pactolus on a local peak finding dataset.
    """

    def __init__(self, name_key="undefined"):
        """
        Initialize the basic data members
        """
        super(omsi_score_pactolus, self).__init__()
        self.analysis_identifier = name_key
        dtypes = self.get_default_dtypes()
Exemplo n.º 19
0
    def __init__(self,
                 hdr_filename=None,
                 t2m_filename=None,
                 img_filename=None,
                 basename=None,
                 requires_slicing=True):
        """Open an img file for data reading.

            :param hdr_filename: The name of the hdr header file
            :type hdr_filename: string

            :param t2m_filename: The name of the t2m_filename
            :type t2m_filename: string

            :param img_filename: The name of the img data file
            :type img_filename: string

            :param basename: Instead of img_filename, t2m_filename, and hdr_filename one may also supply just
                             a single basename. The basename is completed with the .img, .t2m, .hdr extension
                             to load the data.
            :type basename: string

            :param requires_slicing: Unused here. Slicing is always supported by this reader.
            :type requires_slicing: Boolean

            :raises ValueError: In case that basename and hdr_filename, t2m_filename, and img_filename are specified.
        """
        super(img_file, self).__init__(basename, requires_slicing)
        self.data_type = 'uint16'
        self.shape = [
            0, 0, 0
        ]  # Number of pixels in x,y, and z. NOTE: Type changed to tuple later on.
        self.mz = 0  # A numpy vector with the m/z values of the instrument

        if basename and hdr_filename and t2m_filename and img_filename:
            raise ValueError(
                "Conflicting input. Provide either basename or the " +
                "hdr_filename,t2m_filename,img_filename parameters but not both."
            )
        if basename:
            basefile = basename
            if os.path.isdir(basename):
                filelist = self.get_files_from_dir(basename)
                log_helper.log_var(__name__, filelist=filelist)
                if len(filelist) > 0:
                    basefile = filelist[0]
                else:
                    raise ValueError(
                        "No valid img file found in the given directory.")
            elif basefile.endswith(".img") and os.path.exists(basefile):
                basefile = basefile.rstrip(".img")
            elif basefile.endswith(".hdr") and os.path.exists(basefile):
                basefile = basefile.rstrip(".hdr")
            elif basefile.endswith(".t2m") and os.path.exists(basefile):
                basefile = basefile.rstrip(".t2m")

            log_helper.log_var(__name__, basefile=basefile)
            if os.path.exists(basefile + ".hdr") and \
                    os.path.exists(basefile + ".t2m") and \
                    os.path.exists(basefile + ".img"):
                hdr_filename = basefile + ".hdr"
                t2m_filename = basefile + ".t2m"
                img_filename = basefile + ".img"
            else:
                raise ValueError(
                    "No valid img file found for the given basename.")
        elif hdr_filename and t2m_filename and img_filename:
            pass  # Nothing to be done
        else:
            raise ValueError(
                "Missing input parameter. Either provide: " +
                " i) basename or ii) hdr_filename, t2m_filename, img_filename")

        # Initialize the x and y length
        hdr = open(hdr_filename, 'rb')
        hdrdata = np.fromfile(file=hdr_filename, dtype='int16', count=-1)
        self.shape[0] = int(hdrdata[23])
        self.shape[1] = int(hdrdata[22])
        hdr.close()

        # Initialize the z length
        t2m = open(t2m_filename, 'rb')
        self.mz = np.fromfile(file=t2m, dtype='float32', count=-1)
        self.shape[2] = self.mz.shape[0]
        t2m.close()

        # Convert the shape variable to the expected tuple
        self.shape = tuple(self.shape)

        # Open the img file with the spectrum data
        self.img_filename = img_filename
        self.file_opened = False
        try:
            self.m_img_file = np.memmap(filename=self.img_filename,
                                        dtype=self.data_type,
                                        shape=self.shape,
                                        mode='r',
                                        order='C')
            self.file_opened = True
        except ValueError:
            # Check if the size of the file matches what we expect
            imgsize = os.stat(self.img_filename).st_size
            itemsize = np.dtype(self.data_type).itemsize
            expectednumvalues = int(self.shape[0]) * int(self.shape[1]) * int(
                self.shape[2])
            expectedsize = expectednumvalues * int(itemsize)
            sizedifference = expectedsize - imgsize
            log_helper.warning(__name__ , "IMG size: " + str(imgsize) + " Expected size: " + \
                                          str(expectedsize) + "  (difference="+str(sizedifference) + ")")
            if imgsize < expectedsize:
                # Check whether the missing data aligns with images or spectra
                slicesize = int(self.shape[0]) * int(self.shape[1]) * itemsize
                spectrumsize = int(self.shape[2]) * itemsize
                percentmissing = float(sizedifference) / float(expectedsize)
                valuesmissing = float(sizedifference) / itemsize
                warnings.warn("WARNING: Missing " + str(sizedifference) +
                              " bytes in img file (missing " +
                              str(valuesmissing) + " intensity values; " +
                              str(percentmissing) + "%)." +
                              " Expected shape: " + str(self.shape))
                # Define how we should deal with the error
                expandslice = (sizedifference % slicesize) == 0
                expandspectra = (sizedifference % spectrumsize) == 0
                if not expandslice:
                    expandspectra = True
                # Complete missing spectra
                if expandspectra:
                    warnings.warn(
                        "Dealing with missing data in img file by completing last spectra with 0's."
                    )
                    # TODO np.require create an in-memory copy of the full data. Allow usage of memmap'ed tempfile.
                    tempmap = np.require(np.memmap(filename=self.img_filename,
                                                   dtype=self.data_type,
                                                   mode='r',
                                                   order='C'),
                                         requirements=['O', 'C'])
                    # Extend the memmap to the expected size
                    tempmap.resize((expectednumvalues, ))
                    # Reshape the memmap to the expected shape
                    self.m_img_file = tempmap.reshape(self.shape, order='C')
                    self.file_opened = True
                # Complete missing slices
                elif expandslice:
                    slicesmissing = sizedifference / slicesize
                    self.mz = self.mz[:(-slicesmissing)]
                    warnings.warn(
                        "Dealing with missing data in img file by updating he m/z axis.."
                        +
                        " It looks like the m/z axis data may be inconsistent"
                        + " with the binary data. Removing " +
                        str(slicesmissing) + " bins from the m/z axis.")
                    self.shape = list(self.shape)
                    self.shape[2] = self.mz.shape[0]
                    self.shape = tuple(self.shape)
                    self.m_img_file = np.memmap(filename=self.img_filename,
                                                dtype=self.data_type,
                                                shape=self.shape,
                                                mode='r',
                                                order='C')
                    self.file_opened = True
                else:
                    raise
            else:
                raise
        except:
            log_helper.error(
                __name__, "Error while opening the img file: " + img_filename)
            raise
Exemplo n.º 20
0
"""
MIDAS spectrum analysis
"""

# TODO  Add support for using centroided MSI data directly by removing 0's from the intensity and mz array
# TODO  We can further parallelize the callculations by splitting the compound list up as well (not just the spectra)

from omsi.analysis.base import analysis_base
import omsi.shared.mpi_helper as mpi_helper
from omsi.shared.log import log_helper
try:
    import MIDAS
except ImportError:
    log_helper.error(__name__, "Import of MIDAS failed. The omsi_score_midas module will not work.")
import os
import numpy as np
import time
import sys


class omsi_score_midas(analysis_base):
    """
    Class for executing midas on an MSI or local peak finding dataset.
    """

    def __init__(self, name_key="undefined"):
        """
        Initialize the basic data members
        """
        super(omsi_score_midas, self).__init__()
        self.analysis_identifier = name_key