def test_study_config_fsl(self): if not sys.platform.startswith('win'): try: study_config = StudyConfig(use_fsl=True) except EnvironmentError as e: # If FSL cannot be configured automatically, skip the test print( 'WARNING: Skip FSL test because it cannot be configured automatically:', str(e), file=sys.stderr) return test_image = '/usr/share/data/fsl-mni152-templates/MNI152_T1_1mm_brain.nii.gz' if not osp.exists(test_image): fsl_dir = os.environ.get('FSLDIR') test_image = None if not fsl_dir and study_config.fsl_config is not Undefined: fsl_dir = osp.dirname( osp.dirname(osp.dirname(study_config.fsl_config))) if fsl_dir: test_image = glob( osp.join( fsl_dir, 'fslpython/envs/fslpython/lib/python*/site-packages/nibabel/tests/data/anatomical.nii' )) if test_image: test_image = test_image[0] if not test_image: print( 'WARNING: Skip FSL test because test data cannot be found', file=sys.stderr) return bet = study_config.get_process_instance(Bet) with tempfile.NamedTemporaryFile(suffix='.nii.gz') as tmp: bet.run(input_image=test_image, output_image=tmp.name) self.assertTrue(os.stat(tmp.name).st_size != 0)
class TestRunProcess(unittest.TestCase): """ Execute a process. """ def test_execution_with_cache(self): """ Execute a process with cache. """ # Create a study configuration self.output_directory = tempfile.mkdtemp() self.study_config = StudyConfig(modules=["SmartCachingConfig"], use_smart_caching=True, output_directory=self.output_directory) # Call the test self.execution_dummy() # Rm temporary folder shutil.rmtree(self.output_directory) def test_execution_without_cache(self): """ Execute a process without cache. """ # Create a study configuration self.output_directory = tempfile.mkdtemp() self.study_config = StudyConfig(modules=["SmartCachingConfig"], use_smart_caching=False, output_directory=self.output_directory) # Call the test self.execution_dummy() # Rm temporary folder shutil.rmtree(self.output_directory) def execution_dummy(self): """ Test to execute DummyProcess. """ # Create a process instance process = self.study_config.get_process_instance( DummyProcess, output_directory=self.output_directory) # Test the cache mechanism for param in [(1., 2.3), (2., 2.), (1., 2.3)]: self.study_config.run(process, execute_qc_nodes=False, verbose=1, f1=param[0], f2=param[1]) self.assertEqual(process.res, param[0] * param[1]) self.assertEqual(process.output_directory, self.output_directory)
class CapsulEngine(Controller): ''' A CapsulEngine is the mandatory entry point of all software using Capsul. It contains objects to store configuration and metadata, defines execution environment(s) (possibly remote) and performs pipelines execution. A CapsulEngine must be created using capsul.engine.capsul_engine function. For instance:: from capsul.engine import capsul_engine ce = capsul_engine() Or:: from capsul.api import capsul_engine ce = capsul_engine() By default, CapsulEngine only stores necessary configuration. But it may be necessary to modify the Python environment globally to apply this configuration. For instance, Nipype must be configured globally. If SPM is configured in CapsulEngine, it is necessary to explicitly activate the configuration in order to modify the global configuration of Nipype for SPM. This activation is done by explicitly activating the execution context of the capsul engine with the following code, inside a running process:: from capsul.engine import capsul_engine, activate_configuration ce = capsul_engine() # Nipype is not configured here config = capsul_engine.settings.select_configurations( 'global', {'nipype': 'any'}) activate_configuration(config) # Nipype is configured here .. note:: CapsulEngine is the replacement of the older :class:`~capsul.study_config.study_config.StudyConfig`, which is still present in Capsul 2.2 for backward compatibility, but will disappear in later versions. In Capsul 2.2 both objects exist, and are synchronized internally, which means that a StudyConfig object will also create a CapsulEngine, and the other way, and modifications in the StudyConfig object will change the corresponding item in CapsulEngine and vice versa. Functionalities of StudyConfig are moving internally to CapsulEngine, StudyConfig being merely a wrapper. **Using CapsulEngine** It is used to store configuration variables, and to handle execution within the configured context. The configuration has 2 independent axes: configuration modules, which provide additional configuration variables, and "environments" which typically represent computing resources. *Computing resources* Capsul is using :somaworkflow:`Soma-Workflow <index.html>` to run processes, and is thus able to connect and execute on a remote computing server. The remote computing resource may have a different configuration from the client one (paths for software or data, available external software etc). So configurations specific to different computing resources should be handled in CapsulEngine. For this, the configuration section is split into several configuration entries, one for each computing resource. As this is a little bit complex to handle at first, a "global" configuration (what we call "environment") is used to maintain all common configuration options. It is typically used to work on the local machine, especially for users who only work locally. Configuration is stored in a database (either internal or persistent), through the :class:`~capsul.engine.settings.Settings` object found in ``CapsulEngine.settings``. Access and modification of settings should occur within a session block using ``with capsul_engine.settings as session``. See the :class:`~capsul.engine.settings.Settings` class for details. :: >>> from capsul.api import capsul_engine >>> ce = capsul_engine() >>> config = ce.settings.select_configurations('global') >>> config = ce.global_config >>> print(config) {'capsul_engine': {'uses': {'capsul.engine.module.fsl': 'ALL', 'capsul.engine.module.matlab': 'ALL', 'capsul.engine.module.spm': 'ALL'}}} Whenever a new computing resource is used, it can be added as a new environment key to all configuration operations. Note that the settings store all possible configurations for all environments (or computing resources), but are not "activated": this is only done at runtime in specific process execution functions: each process may need to select and use a different configuration from other ones, and activate it individually. :class:`~capsul.process.process.Process` subclasses or instances may provide their configuration requirements via their :meth:`~capsul.process.process.Process.requirements` method. This method returns a dictionary of request strings (one element per needed module) that will be used to select one configuration amongst the available settings entries of each required module. *configuration modules* The configuration is handled through a set of configuration modules. Each is dedicated for a topic (for instance handling a specific external software paths, or managing process parameters completion, etc). A module adds a settings table in the database, with its own variables, and is able to manage runtime configuration of programs, if needed, through its ``activate_configurations`` function. Capsul comes with a set of predefined modules: :class:`~capsul.engine.module.attributes`, :class:`~capsul.engine.module.axon`, :class:`~capsul.engine.module.fom`, :class:`~capsul.engine.module.fsl`, :class:`~capsul.engine.module.matlab`, :class:`~capsul.engine.module.spm` **Methods** ''' def __init__(self, database_location, database, require): ''' CapsulEngine.__init__(self, database_location, database, config=None) The CapsulEngine constructor should not be called directly. Use :func:`capsul_engine` factory function instead. ''' super(CapsulEngine, self).__init__() self._settings = None self._database_location = database_location self._database = database self._loaded_modules = set() self.load_modules(require) from capsul.study_config.study_config import StudyConfig self.study_config = StudyConfig(engine=self) self._metadata_engine = from_json( database.json_value('metadata_engine')) self._connected_resource = '' @property def settings(self): if self._settings is None: self._settings = Settings(self.database.db) return self._settings @property def database(self): return self._database @property def database_location(self): return self._database_location @property def metadata_engine(self): return self._metadata_engine @metadata_engine.setter def metadata_engine(self, metadata_engine): self._metadata_engine = metadata_engine self.database.set_json_value('metadata_engine', to_json(self._metadata_engine)) def load_modules(self, require): ''' Call self.load_module for each required module. The list of modules to load is located in self.modules (if it is None, capsul.module.default_modules is used). ''' if require is None: require = default_modules for module in require: self.load_module(module) def load_module(self, module_name): ''' Load a module if it has not already been loaded (is this case, nothing is done) A module is a fully qualified name of a Python module (as accepted by Python import statement). Such a module must define the two following functions (and may define two others, see below): def load_module(capsul_engine, module_name): def set_environ(config, environ): load_module of each module is called once before reading and applying the configuration. It can be used to add traits to the CapsulEngine in order to define the configuration options that are used by the module. Values of these traits are automatically stored in configuration in database when self.save() is used, and they are retrieved from database before initializing modules. set_environ is called in the context of the processing (i.e. on the, possibly remote, machine that runs the pipelines). It receives the configuration as a JSON compatible dictionary (for instance a CapsulEngine attribute `capsul_engine.spm.directory` would be config['spm']['directory']). The function must modify the environ dictionary to set the environment variables that must be defined for pipeline configuration. These variables are typically used by modules in capsul.in_context module to manage running external software with appropriate configuration. ''' module_name = self.settings.module_name(module_name) if module_name not in self._loaded_modules: self._loaded_modules.add(module_name) python_module = importlib.import_module(module_name) init_settings = getattr(python_module, 'init_settings', None) if init_settings is not None: init_settings(self) return True return False # # Method imported from self.database # # TODO: take computing resource in account in the following methods def set_named_directory(self, name, path): return self.database.set_named_directory(name, path) def named_directory(self, name): return self.database.named_directory(name) def named_directories(self): return self.database.set_named_directories() def set_json_value(self, name, json_value): return self.database.set_json_value(name, json_value) def json_value(self, name): return self.database.json_value(name) def set_path_metadata(self, path, metadata, named_directory=None): return self.database.set_path_metadata(path, metadata, named_directory) def path_metadata(self, path, named_directory=None): return self.database.set_path_metadata(path, named_directory) def import_configs(self, environment, config_dict): ''' Import config values from a dictionary as given by :meth:`Settings.select_configurations`. Compared to :meth:`Settings.import_configs` this method (at :class:`CapsulEngine` level) also loads the required modules. ''' modules = config_dict.get('capsul_engine', {}).get('uses', {}) for module in modules: self.load_module(module) self.settings.import_configs(environment, config_dict) # # Processes and pipelines related methods # def get_process_instance(self, process_or_id, **kwargs): ''' The only official way to get a process instance is to use this method. For now, it simply calls self.study_config.get_process_instance but it will change in the future. ''' instance = self.study_config.get_process_instance( process_or_id, **kwargs) return instance def get_iteration_pipeline(self, pipeline_name, node_name, process_or_id, iterative_plugs=None, do_not_export=None, make_optional=None, **kwargs): """ Create a pipeline with an iteration node iterating the given process. Parameters ---------- pipeline_name: str pipeline name node_name: str iteration node name in the pipeline process_or_id: process description as in :meth:`get_process_instance` iterative_plugs: list (optional) passed to :meth:`Pipeline.add_iterative_process` do_not_export: list passed to :meth:`Pipeline.add_iterative_process` make_optional: list passed to :meth:`Pipeline.add_iterative_process` Returns ------- pipeline: :class:`Pipeline` instance """ from capsul.pipeline.pipeline import Pipeline pipeline = Pipeline() pipeline.name = pipeline_name pipeline.set_study_config(get_ref(self.study_config)) pipeline.add_iterative_process(node_name, process_or_id, iterative_plugs, do_not_export, **kwargs) pipeline.autoexport_nodes_parameters(include_optional=True) return pipeline def start(self, process, workflow=None, history=True, get_pipeline=False, **kwargs): ''' Asynchronously start the execution of a process or pipeline in the connected computing environment. Returns an identifier of the process execution and can be used to get the status of the execution or wait for its termination. TODO: if history is True, an entry of the process execution is stored in the database. The content of this entry is to be defined but it will contain the process parameters (to restart the process) and will be updated on process termination (for instance to store execution time if possible). Parameters ---------- process: Process or Pipeline instance workflow: Workflow instance (optional - if already defined before call) history: bool (optional) TODO: not implemented yet. get_pipeline: bool (optional) if True, start() will return a tuple (execution_id, pipeline). The pipeline is normally the input pipeline (process) if it is actually a pipeline. But if the input process is a "single process", it will be inserted into a small pipeline for execution. This pipeline will be the one actually run, and may be passed to :meth:`wait` to set output parameters. Returns ------- execution_id: int execution identifier (actually a soma-workflow id) pipeline: Pipeline instance (optional) only returned if get_pipeline is True. ''' return run.start(self, process, workflow, history, get_pipeline, **kwargs) def connect(self, computing_resource): ''' Connect the capsul engine to a computing resource ''' self._connected_resource = computing_resource def connected_to(self): ''' Return the name of the computing resource this capsul engine is connected to or None if it is not connected. ''' return self._connected_resource def disconnect(self): ''' Disconnect from a computing resource. ''' self._connected_resource = None def executions(self): ''' List the execution identifiers of all processes that have been started but not disposed in the connected computing resource. Raises an exception if the computing resource is not connected. ''' raise NotImplementedError() def dispose(self, execution_id, conditional=False): ''' Update the database with the current state of a process execution and free the resources used in the computing resource (i.e. remove the workflow from SomaWorkflow). If ``conditional`` is set to True, then dispose is only done if the configuration does not specify to keep succeeded / failed workflows. ''' run.dispose(self, execution_id, conditional=conditional) def interrupt(self, execution_id): ''' Try to stop the execution of a process. Does not wait for the process to be terminated. ''' return run.interrupt(self, execution_id) def wait(self, execution_id, timeout=-1, pipeline=None): ''' Wait for the end of a process execution (either normal termination, interruption or error). ''' return run.wait(self, execution_id, timeout=timeout, pipeline=pipeline) def status(self, execution_id): ''' Return a simple value with the status of an execution (queued, running, terminated, error, etc.) ''' return run.status(self, execution_id) def detailed_information(self, execution_id): ''' Return complete (and possibly big) information about a process execution. ''' return run.detailed_information(self, execution_id) def call(self, process, history=True, *kwargs): return run.call(self, process, history=history, **kwargs) def check_call(self, process, history=True, **kwargs): return run.check_call(self, process, history=history, **kwargs) def raise_for_status(self, status, execution_id=None): ''' Raise an exception if a process execution failed ''' run.raise_for_status(self, status, execution_id)
class CapsulEngine(Controller): ''' A CapsulEngine is the mandatory entry point of all software using Capsul. It contains objects to store configuration and metadata, define execution environment (possibly remote) and perform pipelines execution. A CapsulEngine must be created using capsul.engine.capsul_engine function. For instance : from capsul.engine import capsul_engine ce = capsul_engine() By default, CapsulEngine only store necessary configuration. But it may be necessary to modify Python environment globally to apply this configuration. For instance, Nipype must be configured globally. If SPM is configured in CapsulEngine, it is necessary to explicitely activate the configuration in order to modify the global configuration of Nipype for SPM. This activation is done by explicitely activating the execution context of the capsul engine with the following code : from capsul.engine import capsul_engine ce = capsul_engine() # Nipype is not configured here with ce.execution_context(): # Nipype is configured here # Nipype may not be configured here ''' default_modules = ['capsul.engine.module.spm', 'capsul.engine.module.fsl'] def __init__(self, database_location, database, config=None): ''' CapsulEngine constructor should not be called directly. Use capsul_engine() factory function instead. ''' super(CapsulEngine, self).__init__() self._database_location = database_location self._database = database db_config = database.json_value('config') self._loaded_modules = {} self.modules = database.json_value('modules') if self.modules is None: self.modules = self.default_modules self.load_modules() execution_context = from_json(database.json_value('execution_context')) if execution_context is None: execution_context = ExecutionContext() self._execution_context = execution_context self._processing_engine = from_json(database.json_value('processing_engine')) self._metadata_engine = from_json(database.json_value('metadata_engine')) for cfg in (db_config, config): if cfg: for n, v in cfg.items(): if isinstance(v, dict): o = getattr(self, n) if isinstance(o, Controller): o.import_from_dict(v) continue setattr(self, n, v) self.init_modules() self.study_config = StudyConfig(engine=self) @property def database(self): return self._database @property def database_location(self): return self._database_location @property def execution_context(self): return self._execution_context @execution_context.setter def execution_context(self, execution_context): self._execution_context = execution_context @property def processing_engine(self): return self._processing_engine @property def metadata_engine(self): return self._metadata_engine @metadata_engine.setter def metadata_engine(self, metadata_engine): self._metadata_engine = metadata_engine self.database.set_json_value('metadata_engine', to_json(self._metadata_engine)) def load_modules(self): ''' Call self.load_module for each required module. The list of modules to load is located in self.modules (if it is None, self.default_modules is used). ''' if self.modules is None: modules = self.default_modules else: modules = self.modules for module in modules: self.load_module(module) def load_module(self, module): ''' Load a module if it has not already been loaded (is this case, nothing is done) A module is a fully qualified name of a Python module (as accepted by Python import statement). Such a module must define the two following functions (and may define two others, see below): def load_module(capsul_engine, module_name): def init_module(capul_engine, module_name, loaded_module): load_module of each module is called once before reading and applyin the configuration. It can be used to add traits to the CapsulEngine in order to define the configuration options that are used by the module. Values of these traits are automatically stored in configuration in database when self.save() is used, and they are retrieved from database before initializing modules. init_module of each module is called once after the reading of configuration and the setting of capsul engine attributes defined in traits. A module may define the following functions: def enter_execution_context(execution_context) def exit_execution_context(execution_context) enter_execution_context (resp. exit_execution_context) is called each time the capsul engine's exection context is activated (resp. deactivated). ''' if module not in self._loaded_modules: __import__(module) python_module = sys.modules.get(module) if python_module is None: raise ValueError('Cannot find %s in Python modules' % module) loader = getattr(python_module, 'load_module', None) if loader is None: raise ValueError('No function load_module() defined in %s' % module) self._loaded_modules[module] = loader(self, module) return True return False def init_modules(self): ''' Call self.init_module for each required module. The list of modules to initialize is located in self.modules (if it is None, self.default_modules is used). ''' if self.modules is None: modules = self.default_modules else: modules = self.modules for module in modules: self.init_module(module) def init_module(self, module): ''' Initialize a module by calling its init_module function. ''' python_module = sys.modules.get(module) if python_module is None: raise ValueError('Cannot find %s in Python modules' % module) initializer = getattr(python_module, 'init_module', None) if initializer is None: raise ValueError('No function init_module() defined in %s' % module) initializer(self, module, self._loaded_modules[module]) def save(self): ''' Save the full status of the CapsulEngine in the database. The folowing items are set in the database: 'execution_context': a JSON serialization of self.execution_context 'processing_engine': a JSON serialization of self.processing_engine 'metadata_engine': a JSON serialization of self.metadata_engine 'config': a dictionary containing configuration. This dictionary is obtained using traits defined on capsul engine (ignoring values that are undefined). ''' self.database.set_json_value('execution_context', to_json(self._execution_context)) if self._processing_engine: self.database.set_json_value('processing_engine', to_json(self._processing_engine)) if self._metadata_engine: self.database.set_json_value('metadata_engine', to_json(self._metadata_engine)) config = {} for n in self.user_traits().keys(): v = getattr(self, n) if v is Undefined: continue if isinstance(v, Controller): v = v.export_to_dict(exclude_undefined=True) if not v: continue config[n] = v self.database.set_json_value('config', config) self.database.commit() # # Method imported from self.database # def set_named_directory(self, name, path): return self.database.set_named_directory(name, path) def named_directory(self, name): return self.database.named_directory(name) def named_directories(self): return self.database.set_named_directories() def set_json_value(self, name, json_value): return self.database.set_json_value(name, json_value) def json_value(self, name): return self.database.json_value(name) def set_path_metadata(self, path, metadata, named_directory=None): return self.database.set_path_metadata(name, path, metadata, named_directory) def path_metadata(self, path, named_directory=None): return self.database.set_path_metadata(name, path, named_directory) # # Processes and pipelines related methods # def get_process_instance(self, process_or_id, **kwargs): ''' The only official way to get a process instance is to use this method. For now, it simply calls self.study_config.get_process_instance but it will change in the future. ''' instance = self.study_config.get_process_instance(process_or_id, **kwargs) return instance def start(self, process, history=True): ''' Asynchronously start the exection of a process in the environment defined by self.processing_engine. Returns a string that is an uuid of the process execution and can be used to get the status of the execution or wait for its termination. if history is True, an entry of the process execution is stored in the database. The content of this entry is to be defined but it will contain the process parameters (to restart the process) and will be updated on process termination (for instance to store execution time if possible). ''' raise NotImplementedError() def executions(self): raise NotImplementedError() def interrupt(self, execution_id): ''' Try to stop the execution of a process. Does not wait for the process to be terminated. ''' raise NotImplementedError() def wait(self, execution_id): ''' Wait for the end of a process execution (either normal termination, interruption or error). ''' raise NotImplementedError() def status(self, execution_id): ''' Return information about a process execution. The content of this information is still to be defined. ''' raise NotImplementedError() def detailed_information(self, execution_id): raise NotImplementedError() def call(self, process, history=True): eid = self.start(process, history) return self.wait(eid) def check_call(self, process, history=True): eid = self.start(process, history) status = self.wait(eid) self.raise_for_status(status, eid) def raise_for_status(self, status, execution_id=None): raise NotImplementedError()
class CapsulEngine(Controller): default_modules = ['capsul.engine.module.spm', 'capsul.engine.module.fsl'] def __init__(self, database_location, database, config=None): ''' CapsulEngine constructor should not be called directly. Use engine() factory function instead. ''' super(CapsulEngine, self).__init__() self._database_location = database_location self._database = database self.study_config = StudyConfig() db_config = database.json_value('config') self.modules = database.json_value('modules') if self.modules is None: self.modules = self.default_modules self.load_modules() execution_context = from_json(database.json_value('execution_context')) if execution_context is None: execution_context = ExecutionContext() self._execution_context = execution_context self._processing_engine = from_json(database.json_value('processing_engine')) self._metadata_engine = from_json(database.json_value('metadata_engine')) for cfg in (db_config, config): if cfg: for n, v in cfg.items(): if isinstance(v, dict): o = getattr(self, n) if isinstance(o, Controller): o.import_from_dict(v) continue setattr(self, n, v) self.init_modules() @property def database(self): return self._database @property def database_location(self): return self._database_location @property def execution_context(self): return self._execution_context @execution_context.setter def execution_context(self, execution_context): self._execution_context = execution_context @property def processing_engine(self): return self._processing_engine @property def metadata_engine(self): return self._metadata_engine @metadata_engine.setter def metadata_engine(self, metadata_engine): self._metadata_engine = metadata_engine self.database.set_json_value('metadata_engine', to_json(self._metadata_engine)) def load_modules(self): if self.modules is None: modules = self.default_modules else: modules = self.modules self._loaded_modules = {} for module in modules: self.load_module(module) def load_module(self, module): if module not in self._loaded_modules: __import__(module) python_module = sys.modules.get(module) if python_module is None: raise ValueError('Cannot find %s in Python modules' % module) loader = getattr(python_module, 'load_module', None) if loader is None: raise ValueError('No function load_module() defined in %s' % module) self._loaded_modules[module] = loader(self, module) return True return False def init_modules(self): if self.modules is None: modules = self.default_modules else: modules = self.modules for module in modules: self.init_module(module) def init_module(self, module): python_module = sys.modules.get(module) if python_module is None: raise ValueError('Cannot find %s in Python modules' % module) initializer = getattr(python_module, 'init_module', None) if initializer is None: raise ValueError('No function init_module() defined in %s' % module) initializer(self, module, self._loaded_modules[module]) def save(self): self.database.set_json_value('execution_context', to_json(self._execution_context)) if self._processing_engine: self.database.set_json_value('processing_engine', to_json(self._processing_engine)) if self._metadata_engine: self.database.set_json_value('metadata_engine', to_json(self._metadata_engine)) config = {} for n in self.user_traits().keys(): v = getattr(self, n) if v is Undefined: continue if isinstance(v, Controller): v = v.export_to_dict(exclude_undefined=True) if not v: continue config[n] = v self.database.set_json_value('config', config) self.database.commit() # # Method imported from self.database # def set_named_directory(self, name, path): return self.database.set_named_directory(name, path) def named_directory(self, name): return self.database.named_directory(name) def named_directories(self): return self.database.set_named_directories() def set_json_value(self, name, json_value): return self.database.set_json_value(name, json_value) def json_value(self, name): return self.database.json_value(name) def set_path_metadata(self, path, metadata, named_directory=None): return self.database.set_path_metadata(name, path, metadata, named_directory) def path_metadata(self, path, named_directory=None): return self.database.set_path_metadata(name, path, named_directory) def get_process_instance(self, process_or_id, **kwargs): ''' The supported way to get a process instance is to use this method. For now, it simply calls self.study_config.get_process_instance but it will change in the future. ''' instance = self.study_config.get_process_instance(process_or_id, **kwargs) return instance