def _build(self, **kwargs): # Manage parameters kwargs = kwargs.copy() # Propagate workspace name because we captured it on kwargs # kwargs['workspace_name'] = workspace_name # Force required families: Quetzal workspace must have the following # families: (nb: None means "latest" version) required_families = dict( iguazu=None, omind=None, standard=None, protocol=None ) families = kwargs.get('families', {}) or {} # Could be None by default args for name in required_families: families.setdefault(name, required_families[name]) kwargs['families'] = families # When the query is set by kwargs, leave the query and dialect as they # come. Otherwise, set to the default defined just above if not kwargs.get('query', None): kwargs['query'] = self.DEFAULT_QUERY kwargs['dialect'] = 'postgresql_json' # Manage connections to other flows dataset_flow = GenericDatasetFlow(**kwargs) self.update(dataset_flow) features_files = dataset_flow.terminal_tasks().pop() # instantiate tasks. Use separate tasks for a classic ETL approach: # E: read features from HDF5 file # T and L: merge features into a single dataframe, then save as CSV read_features = LoadDataframe( key='/iguazu/features/survey_meta', ) merge_features = MergeDataframes( filename='surveys_summary.csv', path='datasets', ) notify = SlackTask(message='VR surveys features summarization finished!') with self: feature_dataframes = read_features.map(file=features_files) merged_dataframe = merge_features(parents=features_files, dataframes=feature_dataframes) # Send slack notification notify(upstream_tasks=[merged_dataframe])
def _build(self, *, base_url=DEFAULT_BASE_URL, form_id=None, **kwargs): required_families = dict( iguazu=None, omind=None, protocol=None, standard=None, ) families = kwargs.get('families', {}) or { } # Could be None by default args for name in required_families: families.setdefault(name, required_families[name]) kwargs['families'] = families # When the query is set by kwargs, leave the query and dialect as they # come. Otherwise, set to the default defined just above if not kwargs.get('query', None): kwargs['query'] = self.DEFAULT_QUERY kwargs['dialect'] = 'postgresql_json' # First part of this flow: obtain a dataset of files dataset_flow = GenericDatasetFlow(**kwargs) json_files = dataset_flow.terminal_tasks().pop() self.update(dataset_flow) create_flow_metadata = CreateFlowMetadata(flow_name=self.REGISTRY_NAME) read_json = LoadJSON() read_form = GetForm(form_id=form_id, base_url=base_url) extract_scores = ExtractScores( output_hdf5_key='/iguazu/features/typeform/subject', ) # TODO: propagate metadata when the branch that has that task is merged propagate_metadata = PropagateMetadata( propagate_families=['omind', 'protocol']) update_flow_metadata = UpdateFlowMetadata(flow_name=self.REGISTRY_NAME) with self: create_noresult = create_flow_metadata.map(parent=json_files) form = read_form() responses = read_json.map(file=json_files, upstream_tasks=[create_noresult]) scores = extract_scores.map(parent=json_files, response=responses, form=unmapped(form)) scores_with_metadata = propagate_metadata.map(parent=json_files, child=scores) _ = update_flow_metadata.map(parent=json_files, child=scores_with_metadata)
def click_options(): return GenericDatasetFlow.click_options() + ( click.option('--base-url', required=False, type=click.STRING, default=DEFAULT_BASE_URL, help='Base URL for the typeform API.'), click.option('--form-id', required=False, type=click.STRING, help='ID of the form (questionnaire) on typeform.'), )
def _build(self, **kwargs): required_families = dict( iguazu=None, omind=None, protocol=None, standard=None, ) families = kwargs.get('families', {}) or { } # Could be None by default args for name in required_families: families.setdefault(name, required_families[name]) kwargs['families'] = families # When the query is set by kwargs, leave the query and dialect as they # come. Otherwise, set to the default defined just above if not kwargs.get('query', None): kwargs['query'] = self.DEFAULT_QUERY kwargs['dialect'] = 'postgresql_json' # First part of this flow: obtain a dataset of files dataset_flow = GenericDatasetFlow(**kwargs) features_files = dataset_flow.terminal_tasks().pop() self.update(dataset_flow) read_features = LoadDataframe( key='/iguazu/features/typeform/subject', ) merge_features = MergeDataframes( filename='typeform_summary.csv', path='datasets', ) notify = SlackTask(message='Typeform feature summarization finished!') with self: feature_dataframes = read_features.map(file=features_files) merged_dataframe = merge_features(parents=features_files, dataframes=feature_dataframes) # Send slack notification notify(upstream_tasks=[merged_dataframe])
def _build(self, **kwargs): # Force required families: Quetzal workspace must have the following # families: (nb: None means "latest" version) required_families = dict( iguazu=None, omind=None, standard=None, protocol=None ) families = kwargs.get('families', {}) or {} # Could be None by default args for name in required_families: families.setdefault(name, required_families[name]) kwargs['families'] = families # When the query is set by kwargs, leave the query and dialect as they # come. Otherwise, set to the default defined just above if not kwargs.get('query', None): kwargs['query'] = self.DEFAULT_QUERY kwargs['dialect'] = 'postgresql_json' # The cardiac features flow requires an upstream dataset flow in order # to provide the input files. Create one and deduce the tasks to # plug the cardiac flow to the output of the dataset flow dataset_flow = GenericDatasetFlow(**kwargs) raw_signals = dataset_flow.terminal_tasks().pop() events = raw_signals self.update(dataset_flow) create_flow_metadata = CreateFlowMetadata(flow_name=self.REGISTRY_NAME) # Instantiate tasks survey_report = ExtractReportFeatures( events_hdf5_key='/iguazu/events/standard', output_hdf5_key='/iguazu/features/survey_report', graceful_exceptions=(NoSurveyReport, SoftPreconditionFailed) ) survey_meta = ExtractMetaFeatures( features_hdf5_key='/iguazu/features/survey_report', output_hdf5_key='/iguazu/features/survey_meta' ) propagate_metadata = PropagateMetadata(propagate_families=['omind', 'protocol']) update_flow_metadata = UpdateFlowMetadata(flow_name=self.REGISTRY_NAME) report = Report() notify = SlackTask(preamble='Survey feature extraction finished\n' 'Task report:') with self: create_noresult = create_flow_metadata.map(parent=events) # Feature extraction features_reports = survey_report.map(events=events, upstream_tasks=[create_noresult]) features_metas = survey_meta.map(features=features_reports, parent=raw_signals, upstream_tasks=[create_noresult]) features_with_metadata = propagate_metadata.map(parent=raw_signals, child=features_metas) update_noresult = update_flow_metadata.map(parent=raw_signals, child=features_with_metadata) # Send slack notification message = report(files=features_with_metadata, upstream_tasks=[update_noresult]) notify(message=message) logger.debug('Built flow %s with tasks %s', self, self.tasks)
def click_options(): return GenericDatasetFlow.click_options()
def _build(self, **kwargs): required_families = dict( iguazu=None, omind=None, standard=None, protocol=None, ) families = kwargs.get('families', {}) or { } # Could be None by default args for name in required_families: families.setdefault(name, required_families[name]) kwargs['families'] = families # When the query is set by kwargs, leave the query and dialect as they # come. Otherwise, set to the default defined just above if not kwargs.get('query', None): kwargs['query'] = self.DEFAULT_QUERY kwargs['dialect'] = 'postgresql_json' # First part of this flow: obtain a dataset of files dataset_flow = GenericDatasetFlow(**kwargs) raw_files = dataset_flow.terminal_tasks().pop() self.update(dataset_flow) create_flow_metadata = CreateFlowMetadata(flow_name=self.REGISTRY_NAME) standardize_events = ExtractStandardEvents( name='UnityToStandardEvents', events_hdf5_key='/unity/events/unity_events', output_hdf5_key='/iguazu/events/standard', ) # filter_vr = FilterVRSequences() standardize_ppg_signals = ExtractNexusSignal( name='NexusToStandardPPG', signals_hfd5_key='/nexus/signal/nexus_signal_raw', output_hdf5_key='/iguazu/signal/ppg/standard', source_column='G', target_column='PPG', ) standardize_gsr_signals = ExtractNexusGSRSignal( name='NexusToStandardGSR', signals_hfd5_key='/nexus/signal/nexus_signal_raw', output_hdf5_key='/iguazu/signal/gsr/standard', source_column='F', target_column='GSR', ) standardize_pzt_signals = ExtractNexusSignal( name='NexusToStandardPZT', signals_hfd5_key='/nexus/signal/nexus_signal_raw', output_hdf5_key='/iguazu/signal/pzt/standard', source_column='H', target_column='PZT', ) merge = MergeHDF5( suffix='_standard', temporary=False, verify_status=True, hdf5_family='standard', meta_keys=['standard'], propagate_families=['omind', 'protocol'], ) update_flow_metadata = UpdateFlowMetadata(flow_name=self.REGISTRY_NAME) report = Report() notify = SlackTask( preamble='Standardization of VR flow status finished.\n' 'Task report:') # Build flow with self: create_noresult = create_flow_metadata.map(parent=raw_files) standard_events = standardize_events.map( events=raw_files, upstream_tasks=[create_noresult]) # vr_sequences = filter_vr.map(events=standard_events) standard_ppg = standardize_ppg_signals.map( signals=raw_files, upstream_tasks=[create_noresult]) standard_gsr = standardize_gsr_signals.map( signals=raw_files, upstream_tasks=[create_noresult]) standard_pzt = standardize_pzt_signals.map( signals=raw_files, upstream_tasks=[create_noresult]) merged = merge.map( parent=raw_files, events=standard_events, PPG=standard_ppg, GSR=standard_gsr, PZT=standard_pzt, ) update_noresult = update_flow_metadata.map(parent=raw_files, child=merged) message = report(files=merged, upstream_tasks=[update_noresult]) notify(message=message) logger.debug('Built flow %s with tasks %s', self, self.tasks)
def _build(self, *, plot=False, **kwargs): # Force required families: Quetzal workspace must have the following # families: (nb: None means "latest" version) required_families = dict(iguazu=None, omind=None, standard=None, protocol=None) families = kwargs.get('families', {}) or { } # Could be None by default args for name in required_families: families.setdefault(name, required_families[name]) kwargs['families'] = families # When the query is set by kwargs, leave the query and dialect as they # come. Otherwise, set to the default defined just above if not kwargs.get('query', None): kwargs['query'] = self.DEFAULT_QUERY kwargs['dialect'] = 'postgresql_json' # The cardiac features flow requires an upstream dataset flow in order # to provide the input files. Create one and deduce the tasks to # plug the cardiac flow to the output of the dataset flow dataset_flow = GenericDatasetFlow(**kwargs) raw_signals = dataset_flow.terminal_tasks().pop() events = raw_signals self.update(dataset_flow) create_flow_metadata = CreateFlowMetadata(flow_name=self.REGISTRY_NAME) # Instantiate tasks clean = CleanPPGSignal( signals_hdf5_key='/iguazu/signal/ppg/standard', output_hdf5_key='/iguazu/signal/ppg/clean', ) detect_peaks = SSFPeakDetect( signals_hdf5_key='/iguazu/signal/ppg/clean', ssf_output_hdf5_key='/iguazu/signal/ppg/ssf', nn_output_hdf5_key='/iguazu/signal/ppg/NN', nni_output_hdf5_key='/iguazu/signal/ppg/NNi', ) extract_features = ExtractHRVFeatures( nn_hdf5_key='/iguazu/signal/ppg/NN', nni_hdf5_key='/iguazu/signal/ppg/NNi', output_hdf5_key='/iguazu/features/ppg/sequence', ) propagate_metadata = PropagateMetadata( propagate_families=['omind', 'protocol']) update_flow_metadata = UpdateFlowMetadata(flow_name=self.REGISTRY_NAME) report = Report() notify = SlackTask(preamble='Cardiac feature extraction finished.\n' 'Task report:') with self: create_noresult = create_flow_metadata.map(parent=raw_signals) # Signal processing branch clean_signals = clean.map(signals=raw_signals, upstream_tasks=[create_noresult]) preprocessed_signals = detect_peaks.map(signals=clean_signals) # Feature extraction features = extract_features.map(nn=preprocessed_signals, nni=preprocessed_signals, events=events, parent=raw_signals) features_with_metadata = propagate_metadata.map(parent=raw_signals, child=features) update_noresult = update_flow_metadata.map( parent=raw_signals, child=features_with_metadata) # Send slack notification message = report(files=features_with_metadata, upstream_tasks=[update_noresult]) notify(message=message) logger.debug('Built flow %s with tasks %s', self, self.tasks)
def _build(self, **kwargs): # Force required families: Quetzal workspace must have the following # families: (nb: None means "latest" version) required_families = dict( iguazu=None, omind=None, standard=None, protocol=None, ) families = kwargs.get('families', {}) or { } # Could be None by default args for name in required_families: families.setdefault(name, required_families[name]) kwargs['families'] = families # When the query is set by kwargs, leave the query and dialect as they # come. Otherwise, set to the default defined just above if not kwargs.get('query', None): kwargs['query'] = self.DEFAULT_QUERY kwargs['dialect'] = 'postgresql_json' # The galvanic features flow requires an upstream dataset flow in order # to provide the input files. Create one and deduce the tasks to # plug the galvanic flow to the output of the dataset flow dataset_flow = GenericDatasetFlow(**kwargs) raw_signals = dataset_flow.terminal_tasks().pop() events = raw_signals self.update(dataset_flow) create_flow_metadata = CreateFlowMetadata(flow_name=self.REGISTRY_NAME) # Instantiate tasks clean = CleanGSRSignal(signals_hdf5_key='/iguazu/signal/gsr/standard', events_hdf5_key='/iguazu/events/standard', output_hdf5_key='/iguazu/signal/gsr/clean', graceful_exceptions=(GSRArtifactCorruption, SoftPreconditionFailed)) downsample = Downsample( signals_hdf5_key='/iguazu/signal/gsr/clean', output_hdf5_key='/iguazu/signal/gsr/downsampled', ) cvx = ApplyCVX( signals_hdf5_key='/iguazu/signal/gsr/downsampled', output_hdf5_key='/iguazu/signal/gsr/cvx', ) scrpeaks = DetectSCRPeaks( signals_hdf5_key='/iguazu/signal/gsr/cvx', output_hdf5_key='/iguazu/signal/gsr/scrpeaks', ) extract_features = ExtractGSRFeatures( cvx_hdf5_key='/iguazu/signal/gsr/cvx', scrpeaks_hdf5_key='/iguazu/signal/gsr/scrpeaks', events_hdf5_key='/iguazu/events/standard', output_hdf5_key='/iguazu/features/gsr/sequence', ) propagate_metadata = PropagateMetadata( propagate_families=['omind', 'protocol']) update_flow_metadata = UpdateFlowMetadata(flow_name=self.REGISTRY_NAME) report = Report() notify = SlackTask(preamble='Galvanic feature extraction finished.\n' 'Task report:') with self: create_noresult = create_flow_metadata.map(parent=raw_signals) # Signal processing branch clean_signals = clean.map(signals=raw_signals, annotations=raw_signals, events=events, upstream_tasks=[create_noresult]) downsample_signals = downsample.map( signals=clean_signals, annotations=clean_signals, upstream_tasks=[create_noresult]) cvx_signals = cvx.map(signals=downsample_signals, annotations=downsample_signals, upstream_tasks=[create_noresult]) scr_peaks = scrpeaks.map(signals=cvx_signals, annotations=cvx_signals, upstream_tasks=[create_noresult]) # Feature extraction features = extract_features.map(cvx=cvx_signals, scrpeaks=scr_peaks, events=events, parent=raw_signals) features_with_metadata = propagate_metadata.map(parent=raw_signals, child=features) update_noresult = update_flow_metadata.map( parent=raw_signals, child=features_with_metadata) # Send slack notification message = report(files=features_with_metadata, upstream_tasks=[update_noresult]) notify(message=message) logger.debug('Built flow %s with tasks %s', self, self.tasks)