Exemplo n.º 1
0
    def _build(self, *, base_url=DEFAULT_BASE_URL, form_id=None, **kwargs):

        fetch = FetchResponses(
            base_url=base_url,
            form_id=form_id,
            force=True,  # this task should always run!
        )
        save = SaveResponse(form_id=form_id)
        get_token = GetItem(name='GetResponseID', )
        get_user_hash = GetUserHash()
        add_protocol_metadata = AddStaticMetadata(
            new_meta={
                'protocol': {
                    'name': 'vr-questionnaire',
                    'extra': {
                        'form_id': form_id,
                    },
                }
            })
        add_user_metadata = AddDynamicMetadata(key=('omind', 'user_hash'), )
        report = Report()
        notify = SlackTask(
            preamble='Download of typeform responses finished.\nTask report:')

        with self:
            responses = fetch()
            response_id = get_token.map(task_result=responses,
                                        key=unmapped('response_id'))
            user_hash = get_user_hash.map(response=responses)
            files = save.map(response=responses, response_id=response_id)
            files_with_protocol = add_protocol_metadata.map(file=files)
            files_with_hash = add_user_metadata.map(file=files_with_protocol,
                                                    value=user_hash)
            message = report(files=files_with_hash)
            notify(message=message)
Exemplo n.º 2
0
    def _build(self,
               **kwargs):

        # Manage parameters
        kwargs = kwargs.copy()
        # Propagate workspace name because we captured it on kwargs
        # kwargs['workspace_name'] = workspace_name
        # Force required families: Quetzal workspace must have the following
        # families: (nb: None means "latest" version)
        required_families = dict(
            iguazu=None,
            omind=None,
            standard=None,
            protocol=None
        )
        families = kwargs.get('families', {}) or {}  # Could be None by default args
        for name in required_families:
            families.setdefault(name, required_families[name])
        kwargs['families'] = families

        # When the query is set by kwargs, leave the query and dialect as they
        # come. Otherwise, set to the default defined just above
        if not kwargs.get('query', None):
            kwargs['query'] = self.DEFAULT_QUERY
            kwargs['dialect'] = 'postgresql_json'

        # Manage connections to other flows
        dataset_flow = GenericDatasetFlow(**kwargs)
        self.update(dataset_flow)
        features_files = dataset_flow.terminal_tasks().pop()

        # instantiate tasks. Use separate tasks for a classic ETL approach:
        # E: read features from HDF5 file
        # T and L: merge features into a single dataframe, then save as CSV
        read_features = LoadDataframe(
            key='/iguazu/features/survey_meta',
        )
        merge_features = MergeDataframes(
            filename='surveys_summary.csv',
            path='datasets',
        )

        notify = SlackTask(message='VR surveys features summarization finished!')

        with self:
            feature_dataframes = read_features.map(file=features_files)
            merged_dataframe = merge_features(parents=features_files, dataframes=feature_dataframes)
            # Send slack notification
            notify(upstream_tasks=[merged_dataframe])
Exemplo n.º 3
0
    def _build(self, **kwargs):
        required_families = dict(
            iguazu=None,
            omind=None,
            protocol=None,
            standard=None,
        )
        families = kwargs.get('families', {}) or {
        }  # Could be None by default args
        for name in required_families:
            families.setdefault(name, required_families[name])
        kwargs['families'] = families

        # When the query is set by kwargs, leave the query and dialect as they
        # come. Otherwise, set to the default defined just above
        if not kwargs.get('query', None):
            kwargs['query'] = self.DEFAULT_QUERY
            kwargs['dialect'] = 'postgresql_json'

        # First part of this flow: obtain a dataset of files
        dataset_flow = GenericDatasetFlow(**kwargs)

        features_files = dataset_flow.terminal_tasks().pop()
        self.update(dataset_flow)

        read_features = LoadDataframe(
            key='/iguazu/features/typeform/subject', )
        merge_features = MergeDataframes(
            filename='typeform_summary.csv',
            path='datasets',
        )
        notify = SlackTask(message='Typeform feature summarization finished!')

        with self:
            feature_dataframes = read_features.map(file=features_files)
            merged_dataframe = merge_features(parents=features_files,
                                              dataframes=feature_dataframes)
            # Send slack notification
            notify(upstream_tasks=[merged_dataframe])
Exemplo n.º 4
0
    def _build(self, **kwargs):
        # Force required families: Quetzal workspace must have the following
        # families: (nb: None means "latest" version)
        required_families = dict(
            iguazu=None,
            omind=None,
            standard=None,
            protocol=None
        )
        families = kwargs.get('families', {}) or {}  # Could be None by default args
        for name in required_families:
            families.setdefault(name, required_families[name])
        kwargs['families'] = families

        # When the query is set by kwargs, leave the query and dialect as they
        # come. Otherwise, set to the default defined just above
        if not kwargs.get('query', None):
            kwargs['query'] = self.DEFAULT_QUERY
            kwargs['dialect'] = 'postgresql_json'

        # The cardiac features flow requires an upstream dataset flow in order
        # to provide the input files. Create one and deduce the tasks to
        # plug the cardiac flow to the output of the dataset flow
        dataset_flow = GenericDatasetFlow(**kwargs)
        raw_signals = dataset_flow.terminal_tasks().pop()
        events = raw_signals
        self.update(dataset_flow)

        create_flow_metadata = CreateFlowMetadata(flow_name=self.REGISTRY_NAME)

        # Instantiate tasks
        survey_report = ExtractReportFeatures(
            events_hdf5_key='/iguazu/events/standard',
            output_hdf5_key='/iguazu/features/survey_report',
            graceful_exceptions=(NoSurveyReport,
                                 SoftPreconditionFailed)
        )
        survey_meta = ExtractMetaFeatures(
            features_hdf5_key='/iguazu/features/survey_report',
            output_hdf5_key='/iguazu/features/survey_meta'
        )

        propagate_metadata = PropagateMetadata(propagate_families=['omind', 'protocol'])

        update_flow_metadata = UpdateFlowMetadata(flow_name=self.REGISTRY_NAME)
        report = Report()

        notify = SlackTask(preamble='Survey feature extraction finished\n'
                                    'Task report:')

        with self:

            create_noresult = create_flow_metadata.map(parent=events)
            # Feature extraction
            features_reports = survey_report.map(events=events, upstream_tasks=[create_noresult])
            features_metas = survey_meta.map(features=features_reports, parent=raw_signals,
                                             upstream_tasks=[create_noresult])

            features_with_metadata = propagate_metadata.map(parent=raw_signals, child=features_metas)
            update_noresult = update_flow_metadata.map(parent=raw_signals, child=features_with_metadata)
            # Send slack notification
            message = report(files=features_with_metadata, upstream_tasks=[update_noresult])
            notify(message=message)

        logger.debug('Built flow %s with tasks %s', self, self.tasks)
Exemplo n.º 5
0
    def _build(self, **kwargs):
        required_families = dict(
            iguazu=None,
            omind=None,
            standard=None,
            protocol=None,
        )
        families = kwargs.get('families', {}) or {
        }  # Could be None by default args
        for name in required_families:
            families.setdefault(name, required_families[name])
        kwargs['families'] = families

        # When the query is set by kwargs, leave the query and dialect as they
        # come. Otherwise, set to the default defined just above
        if not kwargs.get('query', None):
            kwargs['query'] = self.DEFAULT_QUERY
            kwargs['dialect'] = 'postgresql_json'

        # First part of this flow: obtain a dataset of files
        dataset_flow = GenericDatasetFlow(**kwargs)

        raw_files = dataset_flow.terminal_tasks().pop()
        self.update(dataset_flow)
        create_flow_metadata = CreateFlowMetadata(flow_name=self.REGISTRY_NAME)
        standardize_events = ExtractStandardEvents(
            name='UnityToStandardEvents',
            events_hdf5_key='/unity/events/unity_events',
            output_hdf5_key='/iguazu/events/standard',
        )
        # filter_vr = FilterVRSequences()
        standardize_ppg_signals = ExtractNexusSignal(
            name='NexusToStandardPPG',
            signals_hfd5_key='/nexus/signal/nexus_signal_raw',
            output_hdf5_key='/iguazu/signal/ppg/standard',
            source_column='G',
            target_column='PPG',
        )
        standardize_gsr_signals = ExtractNexusGSRSignal(
            name='NexusToStandardGSR',
            signals_hfd5_key='/nexus/signal/nexus_signal_raw',
            output_hdf5_key='/iguazu/signal/gsr/standard',
            source_column='F',
            target_column='GSR',
        )
        standardize_pzt_signals = ExtractNexusSignal(
            name='NexusToStandardPZT',
            signals_hfd5_key='/nexus/signal/nexus_signal_raw',
            output_hdf5_key='/iguazu/signal/pzt/standard',
            source_column='H',
            target_column='PZT',
        )
        merge = MergeHDF5(
            suffix='_standard',
            temporary=False,
            verify_status=True,
            hdf5_family='standard',
            meta_keys=['standard'],
            propagate_families=['omind', 'protocol'],
        )
        update_flow_metadata = UpdateFlowMetadata(flow_name=self.REGISTRY_NAME)
        report = Report()
        notify = SlackTask(
            preamble='Standardization of VR flow status finished.\n'
            'Task report:')

        # Build flow
        with self:
            create_noresult = create_flow_metadata.map(parent=raw_files)
            standard_events = standardize_events.map(
                events=raw_files, upstream_tasks=[create_noresult])
            # vr_sequences = filter_vr.map(events=standard_events)
            standard_ppg = standardize_ppg_signals.map(
                signals=raw_files, upstream_tasks=[create_noresult])
            standard_gsr = standardize_gsr_signals.map(
                signals=raw_files, upstream_tasks=[create_noresult])
            standard_pzt = standardize_pzt_signals.map(
                signals=raw_files, upstream_tasks=[create_noresult])
            merged = merge.map(
                parent=raw_files,
                events=standard_events,
                PPG=standard_ppg,
                GSR=standard_gsr,
                PZT=standard_pzt,
            )
            update_noresult = update_flow_metadata.map(parent=raw_files,
                                                       child=merged)
            message = report(files=merged, upstream_tasks=[update_noresult])
            notify(message=message)

        logger.debug('Built flow %s with tasks %s', self, self.tasks)
Exemplo n.º 6
0
    def _build(self, *, plot=False, **kwargs):
        # Force required families: Quetzal workspace must have the following
        # families: (nb: None means "latest" version)
        required_families = dict(iguazu=None,
                                 omind=None,
                                 standard=None,
                                 protocol=None)
        families = kwargs.get('families', {}) or {
        }  # Could be None by default args
        for name in required_families:
            families.setdefault(name, required_families[name])
        kwargs['families'] = families

        # When the query is set by kwargs, leave the query and dialect as they
        # come. Otherwise, set to the default defined just above
        if not kwargs.get('query', None):
            kwargs['query'] = self.DEFAULT_QUERY
            kwargs['dialect'] = 'postgresql_json'

        # The cardiac features flow requires an upstream dataset flow in order
        # to provide the input files. Create one and deduce the tasks to
        # plug the cardiac flow to the output of the dataset flow
        dataset_flow = GenericDatasetFlow(**kwargs)
        raw_signals = dataset_flow.terminal_tasks().pop()
        events = raw_signals
        self.update(dataset_flow)
        create_flow_metadata = CreateFlowMetadata(flow_name=self.REGISTRY_NAME)

        # Instantiate tasks
        clean = CleanPPGSignal(
            signals_hdf5_key='/iguazu/signal/ppg/standard',
            output_hdf5_key='/iguazu/signal/ppg/clean',
        )
        detect_peaks = SSFPeakDetect(
            signals_hdf5_key='/iguazu/signal/ppg/clean',
            ssf_output_hdf5_key='/iguazu/signal/ppg/ssf',
            nn_output_hdf5_key='/iguazu/signal/ppg/NN',
            nni_output_hdf5_key='/iguazu/signal/ppg/NNi',
        )
        extract_features = ExtractHRVFeatures(
            nn_hdf5_key='/iguazu/signal/ppg/NN',
            nni_hdf5_key='/iguazu/signal/ppg/NNi',
            output_hdf5_key='/iguazu/features/ppg/sequence',
        )
        propagate_metadata = PropagateMetadata(
            propagate_families=['omind', 'protocol'])

        update_flow_metadata = UpdateFlowMetadata(flow_name=self.REGISTRY_NAME)
        report = Report()
        notify = SlackTask(preamble='Cardiac feature extraction finished.\n'
                           'Task report:')
        with self:
            create_noresult = create_flow_metadata.map(parent=raw_signals)
            # Signal processing branch
            clean_signals = clean.map(signals=raw_signals,
                                      upstream_tasks=[create_noresult])
            preprocessed_signals = detect_peaks.map(signals=clean_signals)
            # Feature extraction
            features = extract_features.map(nn=preprocessed_signals,
                                            nni=preprocessed_signals,
                                            events=events,
                                            parent=raw_signals)

            features_with_metadata = propagate_metadata.map(parent=raw_signals,
                                                            child=features)
            update_noresult = update_flow_metadata.map(
                parent=raw_signals, child=features_with_metadata)
            # Send slack notification
            message = report(files=features_with_metadata,
                             upstream_tasks=[update_noresult])
            notify(message=message)

        logger.debug('Built flow %s with tasks %s', self, self.tasks)
Exemplo n.º 7
0
    def _build(self, **kwargs):
        # Force required families: Quetzal workspace must have the following
        # families: (nb: None means "latest" version)
        required_families = dict(
            iguazu=None,
            omind=None,
            standard=None,
            protocol=None,
        )
        families = kwargs.get('families', {}) or {
        }  # Could be None by default args
        for name in required_families:
            families.setdefault(name, required_families[name])
        kwargs['families'] = families

        # When the query is set by kwargs, leave the query and dialect as they
        # come. Otherwise, set to the default defined just above
        if not kwargs.get('query', None):
            kwargs['query'] = self.DEFAULT_QUERY
            kwargs['dialect'] = 'postgresql_json'

        # The galvanic features flow requires an upstream dataset flow in order
        # to provide the input files. Create one and deduce the tasks to
        # plug the galvanic flow to the output of the dataset flow
        dataset_flow = GenericDatasetFlow(**kwargs)
        raw_signals = dataset_flow.terminal_tasks().pop()
        events = raw_signals
        self.update(dataset_flow)

        create_flow_metadata = CreateFlowMetadata(flow_name=self.REGISTRY_NAME)

        # Instantiate tasks
        clean = CleanGSRSignal(signals_hdf5_key='/iguazu/signal/gsr/standard',
                               events_hdf5_key='/iguazu/events/standard',
                               output_hdf5_key='/iguazu/signal/gsr/clean',
                               graceful_exceptions=(GSRArtifactCorruption,
                                                    SoftPreconditionFailed))
        downsample = Downsample(
            signals_hdf5_key='/iguazu/signal/gsr/clean',
            output_hdf5_key='/iguazu/signal/gsr/downsampled',
        )
        cvx = ApplyCVX(
            signals_hdf5_key='/iguazu/signal/gsr/downsampled',
            output_hdf5_key='/iguazu/signal/gsr/cvx',
        )
        scrpeaks = DetectSCRPeaks(
            signals_hdf5_key='/iguazu/signal/gsr/cvx',
            output_hdf5_key='/iguazu/signal/gsr/scrpeaks',
        )

        extract_features = ExtractGSRFeatures(
            cvx_hdf5_key='/iguazu/signal/gsr/cvx',
            scrpeaks_hdf5_key='/iguazu/signal/gsr/scrpeaks',
            events_hdf5_key='/iguazu/events/standard',
            output_hdf5_key='/iguazu/features/gsr/sequence',
        )
        propagate_metadata = PropagateMetadata(
            propagate_families=['omind', 'protocol'])

        update_flow_metadata = UpdateFlowMetadata(flow_name=self.REGISTRY_NAME)
        report = Report()
        notify = SlackTask(preamble='Galvanic feature extraction finished.\n'
                           'Task report:')
        with self:
            create_noresult = create_flow_metadata.map(parent=raw_signals)
            # Signal processing branch
            clean_signals = clean.map(signals=raw_signals,
                                      annotations=raw_signals,
                                      events=events,
                                      upstream_tasks=[create_noresult])
            downsample_signals = downsample.map(
                signals=clean_signals,
                annotations=clean_signals,
                upstream_tasks=[create_noresult])
            cvx_signals = cvx.map(signals=downsample_signals,
                                  annotations=downsample_signals,
                                  upstream_tasks=[create_noresult])
            scr_peaks = scrpeaks.map(signals=cvx_signals,
                                     annotations=cvx_signals,
                                     upstream_tasks=[create_noresult])

            # Feature extraction
            features = extract_features.map(cvx=cvx_signals,
                                            scrpeaks=scr_peaks,
                                            events=events,
                                            parent=raw_signals)
            features_with_metadata = propagate_metadata.map(parent=raw_signals,
                                                            child=features)
            update_noresult = update_flow_metadata.map(
                parent=raw_signals, child=features_with_metadata)
            # Send slack notification
            message = report(files=features_with_metadata,
                             upstream_tasks=[update_noresult])
            notify(message=message)

        logger.debug('Built flow %s with tasks %s', self, self.tasks)