Exemplo n.º 1
0
    def run(self, *, parent: FileAdapter, **kwargs) -> FileAdapter:
        output_file = self.default_outputs(parent=parent, **kwargs)
        soft_fail = False
        journal_family = self.meta.metadata_journal_family

        with pd.HDFStore(output_file.file, 'w') as output_store:
            for name, value in kwargs.items():
                # Check if the input was a success, if it was not a success,
                # do not merge this into the results
                file_status = value.metadata.get(journal_family,
                                                 {}).get('status', None)
                if self.verify_status and file_status != 'SUCCESS':
                    logger.warning(
                        'Input %s did not have a success status: %s. '
                        'Ignoring this file on the HDF5 group merge', name,
                        file_status)
                    soft_fail = True
                    continue

                if value.empty:
                    logger.warning('Input %s is empty. Ignoring this file on '
                                   'the HDF5 group merge')
                    continue

                with pd.HDFStore(value.file, 'r') as input_store:
                    for g, input_node in input_store.items():
                        # Copy the HDF5 data
                        dataframe = pd.read_hdf(input_store, key=g)
                        assert isinstance(
                            dataframe, pd.DataFrame
                        )  # Protect from hdf that store something else
                        dataframe.to_hdf(output_store, key=g)

        # Set the hdf5 group metadata
        if self.hdf5_family:
            self.logger.debug(
                'Automatic detection of HDF5 groups that meet the standard...')
            output_file.metadata.setdefault(self.hdf5_family, {})
            output_file.metadata[self.hdf5_family] = infer_standard_groups(
                output_file.file_str)

        # make a copy of parent.metadata
        parent_metadata = copy.deepcopy(parent.metadata)
        # Propagate metadata
        for k in self.propagate_families:
            parent_meta = parent_metadata.get(k, {})
            parent_meta.pop('id', None)
            output_file.metadata[k].update(parent_meta)

        # Handle status with a partial result
        # This is a bit hacky because I had never thought of the use-case:
        # soft fail with these outputs
        # normally, the use case is:
        # soft fail with the default outputs
        if self.verify_status and soft_fail:
            raise GracefulFailWithResults(
                output_file, 'One of the inputs was generated by '
                'a failed or soft-failed task')

        return output_file
Exemplo n.º 2
0
    def run(self, signals: pd.DataFrame, events: pd.DataFrame,
            parent: FileAdapter) -> FileAdapter:
        if signals.empty:
            raise SoftPreconditionFailed('Input signals are empty')
        if events.empty:
            raise SoftPreconditionFailed('Input events are empty')

        output_file = self.default_outputs()

        self.logger.info(
            'Respiration preprocessing for signal=%s, events=%s -> %s',
            signals, events, output_file)

        # extract sequential features
        try:
            features = respiration_sequence_features(signals, events)
        except NoRespirationPeaks:
            # generate empty dataframe with features
            raise GracefulFailWithResults(
                'Could not find peaks/trough in PZT signal, '
                'which is reflects a bad signal . ')

        if not features.empty:
            features.loc[:, 'file_id'] = parent.id

        with pd.HDFStore(output_file.file, 'w') as store:
            features.to_hdf(store, self.output_hdf5_key)
        deep_update(output_file.metadata,
                    {'standard': infer_standard_groups(output_file.file_str)})
        return output_file
Exemplo n.º 3
0
    def run(self, events: pd.DataFrame, parent: FileAdapter) -> FileAdapter:
        if events.empty:
            raise SoftPreconditionFailed('Input events are empty')

        output_file = self.default_outputs()

        self.logger.info('Behavior feature extraction for events=%s -> %s',
                         events, output_file)
        features = extract_space_stress_features(events)
        if not features.empty:
            features.loc[:, 'file_id'] = parent.id

        with pd.HDFStore(output_file.file, 'w') as store:
            features.to_hdf(store, self.output_hdf5_key)
        deep_update(output_file.metadata,
                    {'standard': infer_standard_groups(output_file.file_str)})
        return output_file
Exemplo n.º 4
0
    def run(self, features: pd.DataFrame, parent: FileAdapter) -> FileAdapter:

        if features.empty:
            raise SoftPreconditionFailed('Input features are empty')

        output_file = self.default_outputs()
        features = extract_meta_features(features, config=meta_survey_config)
        if not features.empty:
            features.loc[:, 'file_id'] = parent.id
        self.logger.debug('Obtained %d survey/meta features',
                          features.shape[0])

        with pd.HDFStore(output_file.file, 'w') as store:
            features.to_hdf(store, self.output_hdf5_key)
        deep_update(output_file.metadata,
                    {'standard': infer_standard_groups(output_file.file_str)})
        return output_file
Exemplo n.º 5
0
    def run(
        self,
        *,
        cvx: pd.DataFrame,
        scrpeaks: pd.DataFrame,
        events: Optional[pd.DataFrame] = None,
        parent: FileAdapter
    ) -> FileAdapter:  # TODO: events should be named sequences?

        if cvx.empty:
            raise SoftPreconditionFailed('Input cvx signals are empty')
        if scrpeaks.empty:
            raise SoftPreconditionFailed('Input scrpeaks signals are empty')
        if events.empty:
            raise SoftPreconditionFailed('Input events are empty')

        output = self.default_outputs()
        blacklist = re.compile(
            '.*(intro|outro|lobby).*'
        )  # regexp to remove lobbies that are too short for GSR
        known_sequences = [
            sequence for sequence in VALID_SEQUENCE_KEYS
            if not blacklist.match(sequence)
        ]
        # intro is warm up
        features = gsr_features(cvx,
                                scrpeaks,
                                events,
                                known_sequences=known_sequences)
        if not features.empty:
            features.loc[:, 'file_id'] = parent.id

        store_output(output.file,
                     self.output_hdf5_key,
                     dataframe=features,
                     annotations=None)
        output.metadata['standard'] = infer_standard_groups(output.file_str)
        return output