示例#1
0
    def run(self, *, signals: pd.DataFrame) -> FileAdapter:

        if signals.empty:
            raise SoftPreconditionFailed('Input signals are empty')
        if 'PPG' not in signals.columns:
            raise SoftPreconditionFailed(
                'Input signals do not have a PPG column')

        output_file = self.default_outputs()
        fs = int(estimate_rate(signals))
        bands = (0.5, 11)
        self.logger.info(
            'Band-pass filtering signal between %.2f -- %.2f Hz '
            'with a FIR filter of order %d', *bands, fs)

        filtered = filtfilt_signal(
            signals,
            order=fs,
            frequencies=bands,
            filter_type='bandpass',
            filter_design='fir',
        )
        scaled = scale_signal(filtered, method='robust')

        self.logger.info('Cleaned PPG signal, input shape %s, output shape %s',
                         signals.shape, scaled.shape)

        with pd.HDFStore(output_file.file, 'w') as store:
            scaled.to_hdf(store, self.output_hdf5_key)

        return output_file
示例#2
0
    def run(self, *, signals: pd.DataFrame, annotations: pd.DataFrame,
            events: pd.DataFrame) -> FileAdapter:
        if signals.empty:
            raise SoftPreconditionFailed('Input signals are empty')
        if events.empty:
            raise SoftPreconditionFailed('Input events are empty')

        output = self.default_outputs()

        self.logger.info(
            'Galvanic preprocessing for signal=%s, events=%s -> %s', signals,
            events, output)
        clean, clean_annotations = galvanic_clean(
            signals=signals,
            events=events,
            annotations=annotations,
            column=self.column,
            warmup_duration=self.warmup_duration,
            corrupted_maxratio=self.corrupted_maxratio,
            interpolation_kwargs=self.interpolation_kwargs,
            filter_kwargs=self.filter_kwargs,
            scaling_kwargs=self.scaling_kwargs)
        # todo: keep only last row?
        store_output(output.file,
                     self.output_hdf5_key,
                     dataframe=clean,
                     annotations=clean_annotations)
        return output
示例#3
0
    def run(self, signals: pd.DataFrame, events: pd.DataFrame,
            parent: FileAdapter) -> FileAdapter:
        if signals.empty:
            raise SoftPreconditionFailed('Input signals are empty')
        if events.empty:
            raise SoftPreconditionFailed('Input events are empty')

        output_file = self.default_outputs()

        self.logger.info(
            'Respiration preprocessing for signal=%s, events=%s -> %s',
            signals, events, output_file)

        # extract sequential features
        try:
            features = respiration_sequence_features(signals, events)
        except NoRespirationPeaks:
            # generate empty dataframe with features
            raise GracefulFailWithResults(
                'Could not find peaks/trough in PZT signal, '
                'which is reflects a bad signal . ')

        if not features.empty:
            features.loc[:, 'file_id'] = parent.id

        with pd.HDFStore(output_file.file, 'w') as store:
            features.to_hdf(store, self.output_hdf5_key)
        deep_update(output_file.metadata,
                    {'standard': infer_standard_groups(output_file.file_str)})
        return output_file
示例#4
0
    def run(self, signals: FileAdapter, events: FileAdapter) -> FileAdapter:

        if events.empty:
            raise SoftPreconditionFailed('Input events are empty')

        output_file = self.default_outputs()

        self.logger.info(
            'Respiration preprocessing for signal=%s, events=%s -> %s',
            signals, events, output_file)

        # truncate signals between first and last events
        begins, ends = events.index[0], events.index[-1]
        signals = signals[begins:ends]

        if len(signals) < 2:
            raise SoftPreconditionFailed('Input signals are empty')

        # clean signals
        clean = respiration_clean(signals)

        # todo: find some file examples where signal is bad

        with pd.HDFStore(output_file.file, 'w') as store:
            clean.to_hdf(store, self.output_hdf5_key)
        return output_file
示例#5
0
    def run(self, *, signals: pd.DataFrame) -> FileAdapter:
        if signals.empty:
            raise SoftPreconditionFailed('Input signals are empty')
        if self.column not in signals.columns:
            raise SoftPreconditionFailed(
                'Input signals do not have a PPG column')

        output_file = self.default_outputs()

        # Step 1: calculate SSF
        fs = estimate_rate(signals)
        window_samples = int(self.window_fraction * fs)
        ppg = signals[self.column]
        ppg_ssf = ssf(ppg, win=window_samples)
        df_ssf = pd.DataFrame({'PPG_SSF': ppg_ssf}, index=signals.index)
        self.logger.info(
            'Calculated SSF signal, input shape %s, output shape %s',
            signals.shape, ppg_ssf.shape)

        # Step 2: detect peak with adaptive threshold
        peaks, thresh = detect_ssf_peaks(df_ssf.PPG_SSF,
                                         threshold_percentage=0.50)

        # Step 3: convert to PP intervals and post-process them
        df_interval = peak_to_nn(peaks).rename(columns={'interval': 'NN'})

        # Step 4: interpolate NN
        df_interpolated = nn_interpolation(df_interval, fs=fs, column='NN')

        with pd.HDFStore(output_file.file, 'w') as store:
            df_ssf.to_hdf(store, self.ssf_output_hdf5_key)
            df_interval.to_hdf(store, self.ssf_nn_output_hdf5_key)
            df_interpolated.to_hdf(store, self.ssf_nni_output_hdf5_key)

        return output_file
示例#6
0
    def run(
        self,
        *,
        cvx: pd.DataFrame,
        scrpeaks: pd.DataFrame,
        events: Optional[pd.DataFrame] = None,
        parent: FileAdapter
    ) -> FileAdapter:  # TODO: events should be named sequences?

        if cvx.empty:
            raise SoftPreconditionFailed('Input cvx signals are empty')
        if scrpeaks.empty:
            raise SoftPreconditionFailed('Input scrpeaks signals are empty')
        if events.empty:
            raise SoftPreconditionFailed('Input events are empty')

        output = self.default_outputs()
        blacklist = re.compile(
            '.*(intro|outro|lobby).*'
        )  # regexp to remove lobbies that are too short for GSR
        known_sequences = [
            sequence for sequence in VALID_SEQUENCE_KEYS
            if not blacklist.match(sequence)
        ]
        # intro is warm up
        features = gsr_features(cvx,
                                scrpeaks,
                                events,
                                known_sequences=known_sequences)
        if not features.empty:
            features.loc[:, 'file_id'] = parent.id

        store_output(output.file,
                     self.output_hdf5_key,
                     dataframe=features,
                     annotations=None)
        output.metadata['standard'] = infer_standard_groups(output.file_str)
        return output
示例#7
0
    def run(self, *, signal: pd.DataFrame) -> FileAdapter:
        output_file = self.default_outputs()

        if self.column not in signal:
            raise SoftPreconditionFailed(f'Input dataframe does not have column "{self.column}"')
        x = signal[self.column]
        fs = int(estimate_rate(x))

        properties, _ = extract_all_peaks(x, window_size=fs)

        with pd.HDFStore(output_file.file, 'w') as store:
            properties.to_hdf(store, self.output_hdf5_key)

        return output_file
示例#8
0
    def run(self, signals: pd.DataFrame) -> FileAdapter:
        """Extract and pre-process signals"""
        logger.info('Extracting Nexus signal %s -> %s on file %s',
                    self.source_column, self.target_column,
                    prefect.context.run_kwargs['signals'])

        raw = (
            signals[[self.source_column]]
            .rename(columns={self.source_column: self.target_column})
        )

        # Estimate the sampling frequency: weird signals that have a heavy jitter
        # will fail here early and raise a ValueError. See issue #44
        try:
            fs = estimate_rate(raw)
        except DSUException as ex:
            logger.warning('Failed to estimate rate: %s, raising a precondition fail', ex)
            raise SoftPreconditionFailed(str(ex)) from ex

        logger.debug('Uniform resampling from %.3f Hz to %d Hz', fs, self.sampling_rate)
        # Uniform sampling, with linear interpolation.
        # sample-and-hold is not a good strategy, see issue 48:
        # https://github.com/OpenMindInnovation/iguazu/issues/48
        raw_uniform = uniform_sampling(raw, self.sampling_rate,
                                       interpolation_kind='linear')

        # Create the annotations companion dataframe and mark any nan as a
        # "unknown" problem since it must come from the device / driver.
        # idx_sparse = raw_uniform.isna().any(axis='columns')
        #raw_annotations = raw_uniform.loc[idx_sparse].isna().replace({True: 'unknown', False: ''})
        # I have changed my mind: sparse complicates the code, and we are only saving so little space
        raw_annotations = raw_uniform.isna().replace({True: 'unknown', False: ''})

        n_samples = raw_uniform.shape[0]
        n_nans = (raw_annotations != '').sum()
        logger.debug('Finished standardization of Nexus signal %s -> %s. '
                     'Result has %d samples (%.1f seconds, %.1f minutes) '
                     '%d samples are NaN (%.1f %%).',
                     self.source_column, self.target_column,
                     n_samples,
                     n_samples / self.sampling_rate,
                     n_samples / self.sampling_rate / 60,
                     n_nans,
                     100 * n_nans / n_samples)
        if n_samples > 0:
            logger.debug('Extract of result:\n%s',
                         raw_uniform.to_string(max_rows=5))

        return self.save(raw_uniform, raw_annotations)
示例#9
0
    def run(self, *, signals: pd.DataFrame,
            annotations: pd.DataFrame) -> FileAdapter:
        if signals.empty:
            raise SoftPreconditionFailed('Input signals are empty')

        downsampled = downsample(signals, self.sampling_rate)
        downsampled_annotations = annotations.loc[downsampled.index, :]

        output = self.default_outputs()

        store_output(output.file,
                     self.output_hdf5_key,
                     dataframe=downsampled,
                     annotations=downsampled_annotations)
        return output
示例#10
0
    def run(self, events: pd.DataFrame) -> FileAdapter:

        if events.empty:
            raise SoftPreconditionFailed('Input events are empty')

        output_file = self.default_outputs()
        dataframe = extract_report_features(events)
        self.logger.debug('Obtained %d survey/report features',
                          dataframe.shape[0])
        if not dataframe.empty:
            self.logger.debug('Small extract of survey/report:\n%s',
                              dataframe.to_string(max_rows=5))

        with pd.HDFStore(output_file.file, 'w') as store:
            dataframe.to_hdf(store, self.output_hdf5_key)
        return output_file
示例#11
0
 def run(self, *, signals: pd.DataFrame,
         annotations: pd.DataFrame) -> FileAdapter:
     if signals.empty:
         raise SoftPreconditionFailed('Input signals are empty')
     output = self.default_outputs()
     peaks, peaks_annotations = galvanic_scrpeaks(
         signals,
         annotations,
         column=self.column,
         peaks_kwargs=self.peaks_kwargs,
         max_increase_duration=self.max_increase_duration)
     store_output(output.file,
                  self.output_hdf5_key,
                  dataframe=peaks,
                  annotations=peaks_annotations)
     return output
示例#12
0
    def run(self, events: pd.DataFrame, parent: FileAdapter) -> FileAdapter:
        if events.empty:
            raise SoftPreconditionFailed('Input events are empty')

        output_file = self.default_outputs()

        self.logger.info('Behavior feature extraction for events=%s -> %s',
                         events, output_file)
        features = extract_space_stress_features(events)
        if not features.empty:
            features.loc[:, 'file_id'] = parent.id

        with pd.HDFStore(output_file.file, 'w') as store:
            features.to_hdf(store, self.output_hdf5_key)
        deep_update(output_file.metadata,
                    {'standard': infer_standard_groups(output_file.file_str)})
        return output_file
示例#13
0
    def run(self, features: pd.DataFrame, parent: FileAdapter) -> FileAdapter:

        if features.empty:
            raise SoftPreconditionFailed('Input features are empty')

        output_file = self.default_outputs()
        features = extract_meta_features(features, config=meta_survey_config)
        if not features.empty:
            features.loc[:, 'file_id'] = parent.id
        self.logger.debug('Obtained %d survey/meta features',
                          features.shape[0])

        with pd.HDFStore(output_file.file, 'w') as store:
            features.to_hdf(store, self.output_hdf5_key)
        deep_update(output_file.metadata,
                    {'standard': infer_standard_groups(output_file.file_str)})
        return output_file
示例#14
0
    def preconditions(self, *, events, **inputs):
        """ Verify task preconditions

        Soft preconditions to this task are:

        * Input events follow the :ref:`event_specs`.

        """
        super().preconditions(events=events, **inputs)
        try:
            with pd.HDFStore(events.file, 'r') as store:
                events = pd.read_hdf(store, key=self.input_hdf5_key)
            check_event_specification(events)
        except EventSpecificationError as ex:
            logger.info('VR selection will not run: the input does not '
                        'adhere to standard event specification')
            raise SoftPreconditionFailed('Input did not adhere to standard '
                                         'event specification') from ex
示例#15
0
    def run(self, *, signals: pd.DataFrame,
            annotations: pd.DataFrame) -> FileAdapter:
        if signals.empty:
            raise SoftPreconditionFailed('Input signals are empty')

        output = self.default_outputs()

        cvx, cvx_annotations = galvanic_cvx(
            signals=signals,
            annotations=annotations,
            column=self.column,
            warmup_duration=self.warmup_duration,
            threshold_scr=self.threshold_scr,
            epoch_size=self.epoch_size,
            epoch_overlap=self.epoch_overlap,
        )

        store_output(output.file,
                     self.output_hdf5_key,
                     dataframe=cvx,
                     annotations=cvx_annotations)
        return output
示例#16
0
    def run(self, events: pd.DataFrame) -> FileAdapter:
        """Extract and standardize unity events from the VR protocol

        Most of the logic of this task is expressed in the
        :py:func:`extract_standardized_events` function. This function simply
        wraps this logic to be usable as an Iguazu task.

        Parameters
        ----------
        events
            Dataframe with events in a VR-protocol compatible format.

        Returns
        -------
        A file with the events converted to standard format

        See Also
        --------
        :ref:`standard event specifications <event_specs>`.
        :py:func:`extract_standardized_events.

        """
        if events.empty:
            raise SoftPreconditionFailed('Input events are empty')

        output_file = self.default_outputs()
        dataframe = extract_standardized_events(events)

        self.logger.debug('Obtained %d events/sequences', dataframe.shape[0])
        if not dataframe.empty:
            self.logger.debug('Small extract of events/sequences:\n%s',
                              dataframe.to_string(max_rows=5))

        with pd.HDFStore(output_file.file, 'w') as store:
            dataframe.to_hdf(store, self.output_hdf5_key)

        return output_file
示例#17
0
    def preconditions(self, **inputs) -> NoReturn:
        """ Check preconditions on the task inputs

        This function does several automatic verification on the inputs such as
        force verification, check that previous results do not exist, etc.

        You can add your own preconditions by overriding this method, but
        do not forget to call the parent method with
        ``super().preconditions(...)``.

        When overriding this method, you should raise a
        :py:class:`PreconditionFailed` when there is an unmet condition of the
        inputs but the default outputs of the task can be used in downstream
        tasks. This is useful for cases where one wants the flow to continue
        even if there is a small problem on this particular task.

        If there is a non-recoverable error, or when the default outputs should
        not be used downstream, raise another exception that does not derive
        from :py:class:`PreconditionFailed`.

        Note that the inputs are received without any automatic transformation
        like the file adapter to dataframe transformation.

        Parameters
        ----------
        inputs
            Like ``**kwargs``, these are all the keyword arguments sent to
            the run method of the task

        Raises
        ------
        PreconditionFailed
            When the input does not meet a required condition but the task can
            continue with its default outputs.
        """
        family = self.meta.metadata_journal_family
        # Precondition 1:
        # Previous output does not exist or task is forced
        if not self.forced:
            # Here, it is important to generate the default_outputs only when
            # the task is not forced, because when the task IS forced, then
            # any call to create_file in default_outputs will delete any
            # pre-existing file
            default_output = self.default_outputs(**inputs)
            if isinstance(default_output, FileAdapter):
                default_output_meta = default_output.metadata.get(family, {})
                default_base_meta = default_output.metadata['base']
                if default_base_meta.get('state', None) != 'DELETED' and \
                   default_output_meta.get('status', None) is not None:
                    raise PreviousResultsExist(
                        'Previous results already exist')

        # Precondition 2:
        # Inputs are *not* marked as a failed result from a previous task
        for input_name, input_value in inputs.items():
            if not isinstance(input_value, FileAdapter):
                continue
            input_meta = input_value.metadata.get(family, {})
            if input_meta.get('status', None) == 'FAILURE':

                raise SoftPreconditionFailed(
                    'Previous task failed, generating a file '
                    'with a failed status')
示例#18
0
 def preconditions(self, *, file: FileAdapter, **inputs):
     super().preconditions(file=file, **inputs)
     if file.empty:
         raise SoftPreconditionFailed('Input file was empty')
示例#19
0
    def preconditions(self, *, signals, **kwargs):
        super().preconditions(signals=signals, **kwargs)

        # Precondition: input signals is not empty
        if signals.empty:
            raise SoftPreconditionFailed('Input signals are empty')