Пример #1
0
    def __init__(self,
                 data=None,
                 stats=None,
                 metadata=None,
                 time_resolution=None,
                 processing_chain=None,
                 **kwargs):
        kwargs.update({
            'data': data,
            'stats': stats,
            'metadata': metadata,
            'time_resolution': time_resolution,
            'processing_chain': processing_chain
        })

        # Run DataMatrix2DContainer init
        DataMatrix2DContainer.__init__(self, **kwargs)

        # Run super init
        super(FeatureContainer, self).__init__(**kwargs)
Пример #2
0
    def process(self, data=None, store_processing_chain=False, **kwargs):
        """Process

        Parameters
        ----------
        data : DataContainer
            Data

        store_processing_chain : bool
            Store processing chain to data container returned
            Default value False

        Returns
        -------
        DataMatrix3DContainer

        """
        from dcase_util.containers import AudioContainer, DataMatrix2DContainer

        if isinstance(data, AudioContainer):
            audio_data = data.data
            if data.channels == 1:
                audio_data = audio_data[numpy.newaxis, :]

            # Do processing
            container = self.sequencer.sequence(data=DataMatrix2DContainer(
                audio_data, time_resolution=1 / float(data.fs)),
                                                **kwargs)

            if store_processing_chain:
                # Get processing chain item
                processing_chain_item = self.get_processing_chain_item()

                # Update current processing parameters into chain item
                processing_chain_item.update({'process_parameters': kwargs})

                # Push chain item into processing chain stored in the container
                container.processing_chain.push_processor(
                    **processing_chain_item)

            return container

        else:
            message = '{name}: Wrong input data type, type required [{input_type}].'.format(
                name=self.__class__.__name__, input_type=self.input_type)

            self.logger.exception(message)
            raise ValueError(message)
Пример #3
0
    def process(self, data=None, store_processing_chain=False, **kwargs):
        """Process the data with processing chain

        Parameters
        ----------
        data : DataContainer
            Data

        store_processing_chain : bool
            Store processing chain to data container returned
            Default value False

        Returns
        -------
        data : DataContainer
            Processed data

        """

        for step_id, step in enumerate(self):
            # Loop through steps in the processing chain

            if isinstance(step, ProcessingChainItem):

                if step_id == 0 and data is None:
                    # Inject data for the first item in the chain

                    if step.processor_class.input_type == ProcessingChainItemType.DATA_CONTAINER:
                        from dcase_util.containers import DataMatrix2DContainer
                        data = DataMatrix2DContainer(**kwargs).load()

                    elif step.processor_class.input_type == ProcessingChainItemType.DATA_REPOSITORY:
                        from dcase_util.containers import DataRepository
                        data = DataRepository(**kwargs).load()

                if 'preprocessing_callbacks' in step and isinstance(
                        step['preprocessing_callbacks'], list):
                    # Handle pre-processing callbacks assigned to current processor

                    for method in step['preprocessing_callbacks']:
                        if isinstance(method, dict):
                            method_name = method.get('method_name')
                            method_parameters = method.get('parameters')
                            if hasattr(step.processor_class, method_name):
                                getattr(step.processor_class,
                                        method_name)(**method_parameters)

                if hasattr(step.processor_class, 'process'):
                    # Call process method of the processor if it exists

                    # Get process parameters from step
                    process_parameters = step.get('process_parameters', {})

                    # Update parameters with current parameters given
                    process_parameters.update(kwargs)

                    # Do actual processing
                    data = step.processor_class.process(
                        data=data,
                        store_processing_chain=store_processing_chain,
                        **process_parameters)

        return data
Пример #4
0
    def as_matrix(self,
                  label_list=None,
                  filename=None,
                  file_list=None,
                  default_value=0):
        """Get probabilities as data matrix.
        If items has index defined, index is used to order columns.
        If items has filename, filename is used to order columns.

        Parameters
        ----------
        label_list : list of str
            List of labels. If none given, labels in the container are used in alphabetical order.
            Default value None

        filename : str
            Filename to filter content. If none given, one given for class constructor is used.
            Default value None

        file_list : list of str
            List of filenames to included in the matrix.
            Default value None

        default_value : numerical
            Default value of the element in the matrix. Used in case there is no data for the element in the container.

        Returns
        -------
        DataMatrix2DContainer

        """

        data = self.filter(filename=filename, file_list=file_list)

        if label_list is None:
            label_list = data.unique_labels

        indices = data.unique_indices

        if file_list is None:
            file_list = data.unique_files

        if indices:
            matrix = numpy.ones(
                (len(label_list), len(indices))) * default_value
            for index in indices:
                current_column = data.filter(index=index)
                for item in current_column:
                    if item.label in label_list:
                        matrix[label_list.index(item.label),
                               index] = item.probability

            from dcase_util.containers import DataMatrix2DContainer
            return DataMatrix2DContainer(data=matrix)

        elif file_list:
            matrix = numpy.ones(
                (len(label_list), len(file_list))) * default_value

            for file_id, filename in enumerate(file_list):
                current_column = data.filter(filename=filename)
                for item in current_column:
                    if item.label in label_list:
                        matrix[label_list.index(item.label),
                               file_id] = item.probability

            from dcase_util.containers import DataMatrix2DContainer
            return DataMatrix2DContainer(data=matrix)
Пример #5
0
    def sequence(self, data, shift=None, **kwargs):
        """Convert 2D data matrix into sequence of specified length 2D matrices

        Parameters
        ----------
        data : DataContainer or numpy.ndarray
            Data

        shift : int
            Sequencing grid shift in frames. If none given, one given for class initializer is used.
            Value is kept inside data size. Parameter value is stored as new class stored value.
            Default value None

        Returns
        -------
        DataMatrix3DContainer

        """

        if shift:
            self.shift = shift

        from dcase_util.containers import DataContainer, DataMatrix2DContainer, DataMatrix3DContainer
        # Make copy of the data to prevent modifications to the original data
        data = copy.deepcopy(data)

        if isinstance(data, numpy.ndarray):
            if len(data.shape) == 2:
                data = DataMatrix2DContainer(data)

        if isinstance(data, DataContainer):
            # Make sure shift index is withing data
            self.shift = self.shift % data.length

            # Not the most efficient way as numpy stride_tricks would produce
            # faster code, however, opted for cleaner presentation this time.
            processed_data = []

            if self.shift_border == 'shift':
                segment_indexes = numpy.arange(self.shift, data.length,
                                               self.hop_length)

            elif self.shift_border == 'roll':
                segment_indexes = numpy.arange(0, data.length, self.hop_length)

                if self.shift != 0:
                    # Roll data
                    data.data = numpy.roll(data.data,
                                           shift=-self.shift,
                                           axis=data.time_axis)

            else:
                message = '{name}: Unknown type for sequence border handling when doing temporal shifting ' \
                          '[{shift_border}].'.format(
                    name=self.__class__.__name__,
                    shift_border=self.shift_border
                )

                self.logger.exception(message)
                raise ValueError(message)

            if self.padding:
                if len(segment_indexes) == 0:
                    # Have at least one segment
                    segment_indexes = numpy.array([0])

            else:
                # Remove segments which are not full
                segment_indexes = segment_indexes[(
                    segment_indexes + self.sequence_length - 1) < data.length]

            for segment_start_frame in segment_indexes:
                segment_end_frame = segment_start_frame + self.sequence_length

                frame_ids = numpy.array(
                    range(segment_start_frame, segment_end_frame))

                valid_frames = numpy.where(
                    numpy.logical_and(frame_ids >= 0,
                                      frame_ids < data.length))[0]

                if len(valid_frames) / float(
                        self.sequence_length
                ) > self.required_data_amount_per_segment:
                    # Process segment only if it has minimum about of valid frames

                    if self.padding == 'repeat':
                        # Handle boundaries with repeated boundary vectors

                        # If start of matrix, pad with first frame
                        frame_ids[frame_ids < 0] = 0

                        # If end of the matrix, pad with last frame
                        frame_ids[frame_ids > data.length -
                                  1] = data.length - 1

                        # Append the segment
                        processed_data.append(
                            data.get_frames(frame_ids=frame_ids))

                    elif self.padding == 'zero':
                        # Handle boundaries with zero padding

                        # Initialize current segment with zero content
                        current_segment = numpy.zeros(
                            (data.vector_length, self.sequence_length))

                        # Copy data into correct position within the segment
                        current_segment[:, valid_frames] = data.get_frames(
                            frame_ids=frame_ids[valid_frames])

                        # Append the segment
                        processed_data.append(current_segment)

                    else:
                        # Append the segment
                        processed_data.append(
                            data.get_frames(frame_ids=frame_ids))

            if len(processed_data) == 0:
                message = '{name}: Cannot create valid segment, adjust segment length and hop size, or use ' \
                          'padding flag. (Data length was {length})'.format(
                    name=self.__class__.__name__,
                    length=data.length
                )

                self.logger.exception(message)
                raise IOError(message)

            return DataMatrix3DContainer(
                data=numpy.moveaxis(numpy.array(processed_data), 0, 2),
                time_resolution=None,
                processing_chain=data.processing_chain)

        else:
            message = '{name}: Unknown data container type.'.format(
                name=self.__class__.__name__, )
            self.logger.exception(message)
            raise ValueError(message)
Пример #6
0
    def process(self,
                data=None,
                label=None,
                focus_field=None,
                length_frames=None,
                length_seconds=None,
                store_processing_chain=False,
                **kwargs):
        """Encode metadata

        Parameters
        ----------
        data : MetaDataContainer
            Meta data to encode. Give data in either through meta data container or directly with label parameter.

        label : str
            Class label to be hot

        focus_field : str
            Field from the meta data item to be used in encoding. If None, one given as parameter for class
            constructor is used.

        length_frames : int
            Length of encoded segment in frames. If None, one given as parameter for class constructor is used.

        length_seconds : float > 0.0
            Length of encoded segment in seconds. If None, one given as parameter for class constructor is used.

        store_processing_chain : bool
            Store processing chain to data container returned
            Default value False

        Returns
        -------
        DataMatrix2DContainer

        """

        if data is None and label is None:
            message = '{name}: Give data or label parameter.'.format(
                name=self.__class__.__name__)
            self.logger.exception(message)
            raise ValueError(message)

        from dcase_util.containers import MetaDataContainer

        if data is not None and not isinstance(data, MetaDataContainer):
            message = '{name}: Wrong input data type, type required [{input_type}].'.format(
                name=self.__class__.__name__, input_type=self.input_type)

            self.logger.exception(message)
            raise ValueError(message)

        if focus_field is None:
            focus_field = self.focus_field

        if data is not None and len(data) > 0 and label is None:
            label = data[0].get(focus_field)

        # Do processing
        self.encoder.encode(label=label,
                            length_frames=length_frames,
                            length_seconds=length_seconds)

        if store_processing_chain:
            # Get processing chain item
            processing_chain_item = self.get_processing_chain_item()

            if 'process_parameters' not in processing_chain_item:
                processing_chain_item['process_parameters'] = {}

            processing_chain_item['process_parameters'][
                'focus_field'] = focus_field
            processing_chain_item['process_parameters'][
                'length_frames'] = length_frames

            # Create processing chain to be stored in the container, and push chain item into it
            if hasattr(data, 'processing_chain'):
                data.processing_chain.push_processor(**processing_chain_item)
                processing_chain = data.processing_chain

            else:
                processing_chain = ProcessingChain().push_processor(
                    **processing_chain_item)
        else:
            processing_chain = None

        from dcase_util.containers import DataMatrix2DContainer
        container = DataMatrix2DContainer(
            data=self.encoder.data,
            label_list=self.encoder.label_list,
            time_resolution=self.encoder.time_resolution,
            processing_chain=processing_chain)

        return container