Python MetadataError примеры использования

Язык программирования: Python

Пространство имен/Пакет: tmlib.errors

Класс/Тип: MetadataError

Примеров на hotexamples.com: 8

Python MetadataError - 8 примеров найдено. Это лучшие примеры Python кода для tmlib.errors.MetadataError, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

MetadataError(8)

Основные методы

MetadataError (8)

Пример #1

Показать файл

Файл: api.py Проект: TissueMAPS/TmLibrary

    def run_job(self, batch, assume_clean_state=False):
        '''Extracts OMEXML from microscope image or metadata files.

        Parameters
        ----------
        batch: dict
            description of the *run* job
        assume_clean_state: bool, optional
            assume that output of previous runs has already been cleaned up

        Note
        ----
        The actual processing is delegated to the
       `showinf <http://www.openmicroscopy.org/site/support/bio-formats5.1/users/comlinetools/display.html>`_
        Bioformats command line tool.

        Raises
        ------
        subprocess.CalledProcessError
            when extraction failed
        '''
        # NOTE: Ideally, we would use the BFOmeXmlReader together with JavaBridge
        # but this approach has several shortcomings and requires too much
        # memory to run efficiently on individual cores.
        with tm.utils.ExperimentSession(self.experiment_id) as session:
            for fid in batch['microscope_image_file_ids']:
                img_file = session.query(tm.MicroscopeImageFile).get(fid)
                logger.info('process image %d' % img_file.id)
                # The "showinf" command line tool writes the extracted OMEXML
                # to standard output.
                command = [
                    'showinf', '-omexml-only', '-nopix', '-novalid', '-nocore',
                    '-no-upgrade', '-no-sas', img_file.location
                ]
                p = subprocess.Popen(command,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE)
                stdout, stderr = p.communicate()
                logger.debug("showinf STDOUT: \n```%s```\n", stdout)
                logger.debug("showinf STDERR: \n```%s```\n", stderr)
                if p.returncode != 0 or not stdout:
                    raise MetadataError(
                        'Extraction of OMEXML failed! Error message:\n%s' %
                        stderr)
                # the OME-XML data is contained within XML tags `<OME ...>` and `</OME>`
                start = stdout.find("<OME")
                if start == -1:
                    raise ValueError(
                        "Cannot find OME-XML start tag in `showinf` output.")
                end = stdout.rfind("</OME>", start)
                if end == -1:
                    raise ValueError(
                        "Cannot find OME-XML closing tag in `showinf` output.")
                img_file.omexml = unicode(stdout[start:end + len('</OME>')])
                session.add(img_file)
                session.commit()
                session.expunge(img_file)

Пример #2

Показать файл

Файл: api.py Проект: sparkvilla/TmLibrary

    def run_job(self, batch, assume_clean_state=False):
        '''Extracts OMEXML from microscope image or metadata files.

        Parameters
        ----------
        batch: dict
            description of the *run* job
        assume_clean_state: bool, optional
            assume that output of previous runs has already been cleaned up

        Note
        ----
        The actual processing is delegated to the
       `showinf <http://www.openmicroscopy.org/site/support/bio-formats5.1/users/comlinetools/display.html>`_
        Bioformats command line tool.

        Raises
        ------
        subprocess.CalledProcessError
            when extraction failed
        '''
        # NOTE: Ideally, we would use the BFOmeXmlReader together with JavaBridge
        # but this approach has several shortcomings and requires too much
        # memory to run efficiently on individual cores.
        with tm.utils.ExperimentSession(self.experiment_id) as session:
            for fid in batch['microscope_image_file_ids']:
                img_file = session.query(tm.MicroscopeImageFile).get(fid)
                logger.info('process image %d' % img_file.id)
                # The "showinf" command line tool writes the extracted OMEXML
                # to standard output.
                command = [
                    'showinf', '-omexml-only', '-nopix', '-novalid', '-nocore',
                    '-no-upgrade', '-no-sas', img_file.location
                ]
                p = subprocess.Popen(command,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE)
                stdout, stderr = p.communicate()
                if p.returncode != 0 or not stdout:
                    raise MetadataError(
                        'Extraction of OMEXML failed! Error message:\n%s' %
                        stderr)
                try:
                    # We only want the XML. This will remove potential
                    # warnings and other stuff we don't want.
                    omexml = re.search(r'<(\w+).*</\1>',
                                       stdout,
                                       flags=re.DOTALL).group()
                except:
                    raise RegexError('OMEXML metadata could not be extracted.')
                img_file.omexml = unicode(omexml)
                session.add(img_file)
                session.commit()
                session.expunge(img_file)

Пример #3

Показать файл

    def determine_grid_coordinates_from_layout(self, stitch_layout,
                                               stitch_dimensions):
        '''Determines the coordinates of each image acquisition site within the
        continuous acquisition grid (slide or well in a plate)
        based on a provided layout.

        Parameters
        ----------
        stitch_layout: str
            layout of the acquisition grid
            (options: ``"horizontal"``, ``"zigzag_horizontal"``, ``"vertical"``,
            or ``"zigzag_vertical"``)
        stitch_dimensions: Tuple[int]
            dimensions of the acquisition grid, i.e. number of images
            along the vertical and horizontal axis of the acquired area

        Returns
        -------
        pandas.DataFrame
            metadata for each 2D *Plane* element

        See also
        --------
        :func:`illuminati.stitch.calc_grid_coordinates_from_layout`
        '''
        md = self.metadata

        logger.info('determine acquisition grid coordinates based on layout')
        # Determine the number of unique positions per well
        acquisitions_per_well = md.groupby(
            ['well_name', 'channel_name', 'zplane', 'tpoint'])
        n_acquisitions_per_well = acquisitions_per_well.count().name
        if len(np.unique(n_acquisitions_per_well)) > 1:
            raise MetadataError(
                'Each well must have the same number of acquisition sites.')
        n_sites = n_acquisitions_per_well[0]
        sites = acquisitions_per_well.groups.values()

        logger.debug('stitch layout: {0}; stitch dimensions: {1}'.format(
            stitch_layout, stitch_dimensions))
        coordinates = stitch.calc_grid_coordinates_from_layout(
            stitch_dimensions, stitch_layout)
        y_coordinates = [c[0] for c in coordinates]
        x_coordinates = [c[1] for c in coordinates]
        for indices in sites:
            if len(indices) != len(coordinates):
                raise ValueError('Incorrect stitch dimensions provided.')
            md.loc[indices, 'well_position_y'] = y_coordinates
            md.loc[indices, 'well_position_x'] = x_coordinates

        return self.metadata

Пример #4

Показать файл

    def configure_from_filenames(self, plate_dimensions, regex):
        '''Configures metadata based on information encoded in image filenames
        using a regular expression with the followsing fields:

            - *w*: well
            - *t*: time point
            - *s*: acquisition site
            - *z*: focal plane (z dimension)
            - *c*: channel

        Parameters
        ----------
        plate_dimensions: Tuple[int]
            number of rows and columns in the well plate
        regex: str
            named regular expression

        Raises
        ------
        tmlib.errors.MetadataError
            when image files contain more than more plane, since this case
            wouldn't allow a 1-to-1 mapping of information from filename to
            image plane

        Returns
        -------
        pandas.DataFrame
            metadata for each 2D *Plane* element
        '''
        logger.info('update image metadata with filename information')
        md = self.metadata
        filenames = natsorted(
            list(set([f for fm in self._file_mapper_list for f in fm.files])))
        if md.shape[0] != len(filenames):
            raise MetadataError(
                'Configuration of metadata based on filenames '
                'works only when each image file contains only a single plane.'
            )

        logger.info('retrieve metadata from filenames via regular expression')
        self.check_regular_expression(regex)
        for i, f in enumerate(filenames):
            # Not every microscope provides all the information in the filename.
            fields = self.extract_fields_from_filename(regex, f)
            md.at[i, 'channel_name'] = str(fields.c)
            md.at[i, 'site'] = int(fields.s)
            md.at[i, 'zplane'] = int(fields.z)
            md.at[i, 'tpoint'] = int(fields.t)
            md.at[i, 'well_name'] = str(fields.w)

        return self.metadata

Пример #5

Показать файл

    def determine_grid_coordinates_from_stage_positions(self):
        '''Determines the coordinates of each image acquisition site within the
        continuous acquisition grid (slide or well in a plate)
        based on the absolute microscope stage positions.

        Returns
        -------
        pandas.DataFrame
            metadata for each 2D *Plane* element

        Raises
        ------
        MetadataError
            when stage position information is not available from `metadata`

        See also
        --------
        :func:`illuminati.stitch.calc_grid_coordinates_from_positions`
        '''
        md = self.metadata
        if (any(md.stage_position_y.isnull())
                or any(md.stage_position_x.isnull())):
            raise MetadataError('Stage position information is not available.')

        logger.info('translate absolute microscope stage positions into '
                    'relative acquisition grid coordinates')

        planes_per_well = md.groupby(['well_name'])
        n_tpoints = len(np.unique(md.tpoint))
        n_channels = len(np.unique(md.channel_name))
        n_zplanes = len(np.unique(md.zplane))
        for well_name in np.unique(md.well_name):
            ix = planes_per_well.groups[well_name]
            positions = zip(md.loc[ix, 'stage_position_y'],
                            md.loc[ix, 'stage_position_x'])
            n = len(positions) / (n_tpoints * n_channels * n_zplanes)
            coordinates = self._calculate_coordinates(positions, n)
            md.loc[ix, 'well_position_y'] = [c[0] for c in coordinates]
            md.loc[ix, 'well_position_x'] = [c[1] for c in coordinates]

        return self.metadata

Пример #6

Показать файл

    def _combine_omexml_elements(self, omexml_images, omexml_metadata):
        logger.info('combine OMEXML elements')
        # We assume here that each image files contains the same number images.
        n_images = omexml_images.values()[0].image_count * len(omexml_images)
        if omexml_metadata is not None:
            extra_omexml_available = True
            if not isinstance(omexml_metadata, bioformats.omexml.OMEXML):
                raise TypeError('Argument "omexml_metadata" must have type '
                                'bioformats.omexml.OMEXML.')
            if omexml_metadata.image_count != n_images:
                raise MetadataError(
                    'Number of images in "omexml_metadata" must match '
                    'the total number of Image elements in "omexml_images".')
        else:
            extra_omexml_available = False
            omexml_metadata = bioformats.OMEXML(XML_DECLARATION)
            omexml_metadata.image_count = n_images

        image_element_attributes = {'AcquisitionDate', 'Name'}
        channel_element_attributes = {'Name'}
        pixel_element_attributes = {
            'PixelType', 'SizeC', 'SizeT', 'SizeX', 'SizeY', 'SizeZ'
        }
        plane_element_attributes = {
            'PositionX', 'PositionY', 'PositionZ', 'TheC', 'TheT', 'TheZ'
        }
        filenames = natsorted(omexml_images)
        count = 0
        for i, f in enumerate(filenames):
            omexml_img = omexml_images[f]
            n_series = omexml_img.image_count
            for s in xrange(n_series):
                extracted_image = omexml_img.image(s)
                md_image = omexml_metadata.image(count)
                for attr in image_element_attributes:
                    extracted_value = getattr(extracted_image, attr)
                    if extracted_value is not None:
                        setattr(md_image, attr, extracted_value)

                extracted_pixels = extracted_image.Pixels
                n_planes = extracted_pixels.plane_count
                if n_planes == 0:
                    # Sometimes an image doesn't have any plane elements.
                    # Let's create them for consistency.
                    extracted_pixels = self._create_channel_planes(
                        extracted_pixels)
                    n_planes = extracted_pixels.plane_count

                md_pixels = md_image.Pixels
                md_pixels.plane_count = n_planes
                if extra_omexml_available and (md_pixels.plane_count !=
                                               n_planes):
                    raise MetadataError(
                        'Image element #%d in OMEXML obtained from additional '
                        'metdata files must have the same number of Plane  '
                        'elements as the corresponding Image elements in the '
                        'OMEXML element obtained from image file "%s".' %
                        (i, f))

                for attr in pixel_element_attributes:
                    extracted_value = getattr(extracted_pixels, attr)
                    if extracted_value is not None:
                        # This is python-bioformats being stupid by setting
                        # random default values.
                        setattr(md_pixels, attr, extracted_value)

                for p in xrange(n_planes):
                    extracted_plane = extracted_pixels.Plane(p)
                    md_plane = md_pixels.Plane(p)
                    for attr in plane_element_attributes:
                        extracted_value = getattr(extracted_plane, attr)
                        md_value = getattr(md_plane, attr)
                        if md_value is None and extracted_value is not None:
                            setattr(md_plane, attr, extracted_value)

                    fm = ImageFileMapping()
                    fm.ref_index = count + p
                    fm.files = [f]
                    fm.series = [s]
                    fm.planes = [p]
                    self._file_mapper_list.append(fm)
                    self._file_mapper_lut[f].append(fm)

                n_channels = extracted_pixels.channel_count
                md_image.channel_count = n_channels
                for c in xrange(n_channels):
                    extracted_channel = extracted_pixels.Channel(c)
                    md_channel = md_pixels.Channel(c)
                    for attr in channel_element_attributes:
                        extracted_value = getattr(extracted_channel, attr)
                        if extracted_value is not None:
                            setattr(md_channel, attr, extracted_value)

                count += 1

        return omexml_metadata

Пример #7

Показать файл

Файл: api.py Проект: adrtsc/TissueMAPS

    def collect_job_output(self, batch):
        '''Assigns registered image files from different acquisitions to
        separate *cycles*. If an acquisition includes multiple time points,
        a separate *cycle* is created for each time point.
        The mapping from *acquisitions* to *cycles* is consequently
        1 -> n, where n is the number of time points per acquisition (n >= 1).

        Whether acquisition time points will be interpreted as actual
        time points in a time series depends on the value of
        :attr:`tm.Experiment.plate_acquisition_mode`.

        Parameters
        ----------
        batch: dict
            description of the *collect* job
        '''
        with tm.utils.ExperimentSession(self.experiment_id) as session:
            # We need to do this per plate to ensure correct indices
            # TODO: check plates have similar channels, etc
            experiment = session.query(tm.Experiment).one()
            acquisition_mode = experiment.plate_acquisition_mode
            logger.info('plates were acquired in mode "%s"', acquisition_mode)
            is_time_series = acquisition_mode == 'basic'
            if is_time_series:
                logger.info('time points are interpreted as time series')
            is_multiplexing = acquisition_mode == 'multiplexing'
            if is_multiplexing:
                logger.info('time points are interpreted as multiplexing cycles')

        with tm.utils.ExperimentSession(self.experiment_id) as session:

            channels = session.query(tm.Channel.name, tm.Channel.id).all()
            channel_lut = dict(channels)

            bit_depth = session.query(tm.Channel.bit_depth).distinct().one()
            if len(bit_depth) > 1:
                raise MetadataError('All channels must have the same bit depth.')
            bit_depth = bit_depth[0]
            wavelengths = session.query(tm.Channel.wavelength).\
                distinct().\
                all()
            wavelengths = [w[0] for w in wavelengths]

            # We order acquisitions by the time they got created. This will
            # determine the order of multiplexing cycles.
            plates = session.query(tm.Plate.id).\
                order_by(tm.Plate.created_at).\
                all()
            plate_ids = [p.id for p in plates]
            for p in plate_ids:
                acquisitions = session.query(tm.Acquisition.id).\
                    filter_by(plate_id=p).\
                    order_by(tm.Acquisition.created_at).\
                    all()
                acquisition_ids = [a.id for a in acquisitions]
                t_index = 0
                w_index = 0
                c_index = 0
                for a in acquisition_ids:
                    logger.debug('acquisition %d', a)
                    tpoints = session.query(tm.ChannelImageFile.tpoint).\
                        filter_by(acquisition_id=a).\
                        distinct().\
                        all()
                    tpoints = [t[0] for t in tpoints]
                    for t in tpoints:
                        logger.debug('time point #%d', t)
                        cycle = session.get_or_create(
                            tm.Cycle,
                            index=c_index, experiment_id=self.experiment_id
                        )

                        for w in wavelengths:
                            # Get all channel_image_files for the currently
                            # processed acquisition that match the old values
                            # of the "tpoint" and "channel_id" attributes.
                            image_files = session.query(tm.ChannelImageFile.id).\
                                filter_by(
                                    tpoint=t, acquisition_id=a,
                                    channel_id=channel_lut[w]
                                ).\
                                all()

                            if len(image_files) == 0:
                                # A wavelength might not have been used at
                                # every time point.
                                continue

                            logger.debug('wavelength "%s"', w)
                            if is_multiplexing:
                                # In case of a multiplexing experiment
                                # we create a separate channel for each
                                # combination of wavelength and tpoint.
                                new_channel_name = '{c}_{w}'.format(
                                    c=c_index, w=w
                                )
                            else:
                                # In case of a time series experiment
                                # the name of the channel remains unchanged.
                                new_channel_name = w

                            # Check whether the channel already exists and
                            # update the name accordingly (upon creation, the
                            # "name" attribute should have been set to the
                            # value of the "wavelength" attribute).
                            channel = session.query(tm.Channel).\
                                filter_by(name=w, wavelength=w).\
                                one_or_none()
                            if channel is not None:
                                channel.name = new_channel_name
                                session.add(channel)
                                session.commit()
                            else:
                                channel = tm.Channel(
                                    name=new_channel_name, wavelength=w,
                                    bit_depth=bit_depth,
                                    experiment_id=self.experiment_id
                                )
                                session.add(channel)
                                session.commit()

                            logger.info(
                                'update time point and channel id '
                                'of channel image files: tpoint=%d, channel=%s',
                                t_index, channel.name
                            )
                            # Update the attributes of channel_image_files with
                            # the new values for tpoint and channel_id and also
                            # add the cycle_id.
                            session.bulk_update_mappings(
                                tm.ChannelImageFile, [
                                  {
                                    'id': f.id,
                                    'tpoint': t_index,
                                    'cycle_id': cycle.id,
                                    'channel_id': channel.id
                                  } for f in image_files
                                ]
                            )

                            # Update lookup table
                            channel_lut[new_channel_name] = channel.id

                        if is_time_series:
                            t_index += 1
                        else:
                            c_index += 1

Пример #8

Показать файл

Файл: api.py Проект: adrtsc/TissueMAPS

    def run_job(self, batch, assume_clean_state=False):
        '''Configures OMEXML metadata extracted from microscope image files and
        complements it with metadata retrieved from additional microscope
        metadata files and/or user input.

        The actual processing is delegated to a format-specific implementation of
        :class:`MetadataHandler <tmlib.workflow.metaconfig.base.MetadataHandler>`.

        Parameters
        ----------
        batch: dict
            job description
        assume_clean_state: bool, optional
            assume that output of previous runs has already been cleaned up

        See also
        --------
        :mod:`tmlib.workflow.metaconfig.cellvoyager`
        '''
        regexp = batch.get('regex', '')
        if not regexp:
            regexp = get_microscope_type_regex(
                batch['microscope_type'], as_string=True
            )[0]
        with tm.utils.ExperimentSession(self.experiment_id) as session:
            experiment = session.query(tm.Experiment).one()
            plate_dimensions = experiment.plates[0].dimensions
            acquisition = session.query(tm.Acquisition).\
                get(batch['acquisition_id'])
            metadata_files = session.query(tm.MicroscopeMetadataFile.location).\
                filter_by(acquisition_id=batch['acquisition_id']).\
                all()
            metadata_filenames = [f.location for f in metadata_files]
            image_files = session.query(
                    tm.MicroscopeImageFile.name, tm.MicroscopeImageFile.omexml
                ).\
                filter_by(acquisition_id=batch['acquisition_id']).\
                all()
            omexml_images = {
                f.name: bioformats.OMEXML(f.omexml) for f in image_files
            }

        MetadataReader = metadata_reader_factory(batch['microscope_type'])
        if MetadataReader is not None:
            with MetadataReader() as mdreader:
                omexml_metadata = mdreader.read(
                    metadata_filenames, omexml_images.keys()
                )
        else:
            omexml_metadata = None

        MetadataHandler = metadata_handler_factory(batch['microscope_type'])
        mdhandler = MetadataHandler(omexml_images, omexml_metadata)
        mdhandler.configure_from_omexml()
        missing = mdhandler.determine_missing_metadata()
        if missing:
            logger.warning(
                'required metadata information is missing: "%s"',
                '", "'.join(missing)
            )
            logger.info(
                'try to retrieve missing metadata from filenames '
                'using regular expression'
            )
            if regexp is None:
                logger.warn('no regular expression provided')
            mdhandler.configure_from_filenames(
                plate_dimensions=plate_dimensions, regex=regexp
            )
        missing = mdhandler.determine_missing_metadata()
        if missing:
            raise MetadataError(
                'The following metadata information is missing:\n"%s"\n'
                % '", "'.join(missing)
            )
        # Once we have collected basic metadata such as information about
        # channels and focal planes, we try to determine the relative position
        # of images within the acquisition grid
        try:
            logger.info(
                'try to determine grid coordinates from microscope '
                'stage positions'
            )
            mdhandler.determine_grid_coordinates_from_stage_positions()
        except MetadataError as error:
            logger.warning(
                'microscope stage positions are not available: "%s"'
                % str(error)
            )
            logger.info(
                'try to determine grid coordinates from provided stitch layout'
            )
            # In general, the values of these arguments can be ``None``, because
            # they are not required and may not be used.
            # However, in case the grid coordinates should be determined based
            # on user interput, these arguments are required.
            if not isinstance(batch['n_vertical'], int):
                raise TypeError(
                    'Value of argument "n_vertical" must be an integer.'
                )
            if not isinstance(batch['n_horizontal'], int):
                raise TypeError(
                    'Value of argument "n_horizontal" must be an integer.'
                )
            mdhandler.determine_grid_coordinates_from_layout(
                stitch_layout=batch['stitch_layout'],
                stitch_dimensions=(batch['n_vertical'], batch['n_horizontal'])
            )

        if batch['perform_mip']:
            mdhandler.group_metadata_per_zstack()

        # Create consistent zero-based ids
        mdhandler.update_indices()
        mdhandler.assign_acquisition_site_indices()
        md = mdhandler.remove_redundant_columns()
        fmaps = mdhandler.create_image_file_mappings()

        logger.info('create database entries')

        with tm.utils.ExperimentSession(self.experiment_id) as session:
            channels = dict()
            bit_depth = md['bit_depth'][0]
            for ch_name in np.unique(md['channel_name']):
                logger.info('create channel "%s"', ch_name)
                ch = session.get_or_create(
                    tm.Channel, experiment_id=self.experiment_id,
                    name=ch_name, wavelength=ch_name, bit_depth=bit_depth,
                )
                channels[ch_name] = ch.id

        for w in np.unique(md.well_name):

            with tm.utils.ExperimentSession(self.experiment_id) as session:
                acquisition = session.query(tm.Acquisition).\
                    get(batch['acquisition_id'])

                logger.info('create well "%s"', w)
                w_index = (md.well_name == w)
                well = session.get_or_create(
                    tm.Well, plate_id=acquisition.plate.id, name=w
                )

                channel_image_files = []
                for s in np.unique(md.loc[w_index, 'site']):
                    logger.debug('create site #%d', s)
                    s_index = (md.site == s)
                    y = md.loc[s_index, 'well_position_y'].values[0]
                    x = md.loc[s_index, 'well_position_x'].values[0]
                    height = md.loc[s_index, 'height'].values[0]
                    width = md.loc[s_index, 'width'].values[0]
                    site = session.get_or_create(
                        tm.Site, y=y, x=x, height=height, width=width,
                        well_id=well.id
                    )

                    for index, i in md.ix[s_index].iterrows():
                        channel_image_files.append(
                            tm.ChannelImageFile(
                                tpoint=i.tpoint, zplane=i.zplane,
                                channel_id=channels[i.channel_name],
                                site_id=site.id, acquisition_id=acquisition.id,
                                file_map=fmaps[index],
                            )
                        )

                session.bulk_save_objects(channel_image_files)