示例#1
0
def setup_logger(logger_name, file_path, level, to_screen=False):
    '''
    Function to initialize and configure a logger that can write to file
    and (optionally) the screen.

    Parameters
    ----------
    logger_name : string
        name of the logger
    file_path : string
        file path to the log file on disk
    level : integer
        indicates the level at which the logger should log; this is
        controlled by integers that come with the python logging
        package. (e.g. logging.INFO=20, logging.DEBUG=10)
    to_screen : boolean (optional)
        flag to indicate whether to enable logging to the screen

    Returns
    -------
    logger : logging.Logger object
        Python logging.Logger object which is capable of logging run-
        time information about the program to file and/or screen
    '''

    # Import packages
    import logging

    # Init logger, formatter, filehandler, streamhandler
    logger = logging.getLogger(logger_name)
    logger.setLevel(level)
    formatter = logging.Formatter('%(asctime)s : %(message)s')

    # Write logs to file
    fileHandler = logging.FileHandler(file_path)
    fileHandler.setFormatter(formatter)
    logger.addHandler(fileHandler)

    # Write to screen, if desired
    if to_screen:
        streamHandler = logging.StreamHandler()
        streamHandler.setFormatter(formatter)
        logger.addHandler(streamHandler)

    # Return the logger
    return logger
示例#2
0
def build_collect_workflow(args, retval):
    import os
    import glob
    import warnings
    warnings.filterwarnings("ignore")
    import ast
    import pkg_resources
    from pathlib import Path
    import yaml
    import uuid
    from time import strftime
    import shutil

    try:
        import pynets

        print(f"\n\nPyNets Version:\n{pynets.__version__}\n\n")
    except ImportError:
        print("PyNets not installed! Ensure that you are using the correct"
              " python version.")

    # Set Arguments to global variables
    resources = args.pm
    if resources == "auto":
        from multiprocessing import cpu_count
        import psutil
        nthreads = cpu_count() - 1
        procmem = [
            int(nthreads),
            int(list(psutil.virtual_memory())[4] / 1000000000)
        ]
    else:
        procmem = list(eval(str(resources)))
    plugin_type = args.plug
    if isinstance(plugin_type, list):
        plugin_type = plugin_type[0]
    verbose = args.v
    working_path = args.basedir
    work_dir = args.work
    modality = args.modality
    drop_cols = args.dc
    if isinstance(modality, list):
        modality = modality[0]

    if os.path.isdir(work_dir):
        shutil.rmtree(work_dir)

    os.makedirs(f"{str(Path(working_path))}/{modality}_group_topology_auc",
                exist_ok=True)

    wf = collect_all(working_path, modality, drop_cols)

    with open(pkg_resources.resource_filename("pynets", "runconfig.yaml"),
              "r") as stream:
        try:
            hardcoded_params = yaml.load(stream)
            runtime_dict = {}
            execution_dict = {}
            for i in range(len(hardcoded_params["resource_dict"])):
                runtime_dict[list(hardcoded_params["resource_dict"][i].keys(
                ))[0]] = ast.literal_eval(
                    list(hardcoded_params["resource_dict"][i].values())[0][0])
            for i in range(len(hardcoded_params["execution_dict"])):
                execution_dict[list(
                    hardcoded_params["execution_dict"][i].keys())[0]] = list(
                        hardcoded_params["execution_dict"][i].values())[0][0]
        except FileNotFoundError:
            print("Failed to parse runconfig.yaml")

    run_uuid = f"{strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4()}"
    os.makedirs(f"{work_dir}/pynets_out_collection{run_uuid}", exist_ok=True)
    wf.base_dir = f"{work_dir}/pynets_out_collection{run_uuid}"

    if verbose is True:
        from nipype import config, logging

        cfg_v = dict(
            logging={
                "workflow_level": "DEBUG",
                "utils_level": "DEBUG",
                "interface_level": "DEBUG",
                "filemanip_level": "DEBUG",
                "log_directory": str(wf.base_dir),
                "log_to_file": True,
            },
            monitoring={
                "enabled": True,
                "sample_frequency": "0.1",
                "summary_append": True,
                "summary_file": str(wf.base_dir),
            },
        )
        logging.update_logging(config)
        config.update_config(cfg_v)
        config.enable_debug_mode()
        config.enable_resource_monitor()

        import logging

        callback_log_path = f"{wf.base_dir}{'/run_stats.log'}"
        logger = logging.getLogger("callback")
        logger.setLevel(logging.DEBUG)
        handler = logging.FileHandler(callback_log_path)
        logger.addHandler(handler)

    execution_dict["crashdump_dir"] = str(wf.base_dir)
    execution_dict["plugin"] = str(plugin_type)
    cfg = dict(execution=execution_dict)
    for key in cfg.keys():
        for setting, value in cfg[key].items():
            wf.config[key][setting] = value
    try:
        wf.write_graph(graph2use="colored", format="png")
    except BaseException:
        pass
    if verbose is True:
        from nipype.utils.profiler import log_nodes_cb

        plugin_args = {
            "n_procs": int(procmem[0]),
            "memory_gb": int(procmem[1]),
            "status_callback": log_nodes_cb,
            "scheduler": "mem_thread",
        }
    else:
        plugin_args = {
            "n_procs": int(procmem[0]),
            "memory_gb": int(procmem[1]),
            "scheduler": "mem_thread",
        }
    print("%s%s%s" % ("\nRunning with ", str(plugin_args), "\n"))
    wf.run(plugin=plugin_type, plugin_args=plugin_args)
    if verbose is True:
        from nipype.utils.draw_gantt_chart import generate_gantt_chart

        print("Plotting resource profile from run...")
        generate_gantt_chart(callback_log_path, cores=int(procmem[0]))
        handler.close()
        logger.removeHandler(handler)
    return
示例#3
0
    def run(self, config_file=None, partic_list=None):
        """Establish where and how we're running the pipeline and set up the
        run. (Entry point)

        - This is the entry point for pipeline building and connecting.
          Depending on the inputs, the appropriate workflow runner will
          be selected and executed.

        :type config_file: str
        :param config_file: Filepath to the pipeline configuration file in
                            YAML format.
        :type partic_list: str
        :param partic_list: Filepath to the participant list file in YAML
                            format.
        """

        from time import strftime
        from qap_utils import raise_smart_exception, \
                              check_config_settings

        # in case we are overloading
        if config_file:
            from qap.script_utils import read_yml_file
            self._config = read_yml_file(config_file)
            self.validate_config_dict()
            self._config["pipeline_config_yaml"] = config_file

        if not self._config:
            raise Exception("config not found!")

        if partic_list:
            self._config["subject_list"] = partic_list

        # Get configurations and settings
        check_config_settings(self._config, "num_processors")
        check_config_settings(self._config, "num_sessions_at_once")
        check_config_settings(self._config, "available_memory")
        check_config_settings(self._config, "output_directory")
        check_config_settings(self._config, "working_directory")

        self._num_bundles_at_once = 1
        write_report = self._config.get('write_report', False)

        if "cluster_system" in self._config.keys() and not self._bundle_idx:
            res_mngr = self._config["cluster_system"]
            if (res_mngr == None) or ("None" in res_mngr) or \
                ("none" in res_mngr):
                self._platform = None
            else:
                platforms = ["SGE", "PBS", "SLURM"]
                self._platform = str(res_mngr).upper()
                if self._platform not in platforms:
                    msg = "The resource manager %s provided in the pipeline "\
                          "configuration file is not one of the valid " \
                          "choices. It must be one of the following:\n%s" \
                          % (self._platform, str(platforms))
                    raise_smart_exception(locals(), msg)
        else:
            self._platform = None

        # Create output directory
        try:
            os.makedirs(self._config["output_directory"])
        except:
            if not op.isdir(self._config["output_directory"]):
                err = "[!] Output directory unable to be created.\n" \
                      "Path: %s\n\n" % self._config["output_directory"]
                raise Exception(err)
            else:
                pass

        # Create working directory
        try:
            os.makedirs(self._config["working_directory"])
        except:
            if not op.isdir(self._config["working_directory"]):
                err = "[!] Output directory unable to be created.\n" \
                      "Path: %s\n\n" % self._config["working_directory"]
                raise Exception(err)
            else:
                pass

        results = []

        # set up callback logging
        import logging
        from nipype.pipeline.plugins.callback_log import log_nodes_cb

        cb_log_filename = os.path.join(self._config["output_directory"],
                                       "callback.log")
        # Add handler to callback log file
        cb_logger = logging.getLogger('callback')
        cb_logger.setLevel(logging.DEBUG)
        handler = logging.FileHandler(cb_log_filename)
        cb_logger.addHandler(handler)

        # settle run arguments (plugins)
        self.runargs = {}
        self.runargs['plugin'] = 'MultiProc'
        self.runargs['plugin_args'] = \
            {'memory_gb': int(self._config["available_memory"]),
             'status_callback': log_nodes_cb}
        n_procs = {'n_procs': self._config["num_processors"]}
        self.runargs['plugin_args'].update(n_procs)

        # load the participant list file into dictionary
        subdict = self.load_sublist()

        # flatten the participant dictionary
        self._sub_dict = self.create_session_dict(subdict)

        # create the list of bundles
        self._bundles_list = self.create_bundles()
        num_bundles = len(self._bundles_list)

        if not self._bundle_idx:
            # want to initialize the run-level log directory (not the bundle-
            # level) only the first time we run the script, due to the
            # timestamp. if sub-nodes are being kicked off by a batch file on
            # a cluster, we don't want a new timestamp for every new node run
            self._run_log_dir = op.join(
                self._config['output_directory'],
                '_'.join([self._run_name, "logs"]), '_'.join(
                    [strftime("%Y%m%d_%H_%M_%S"),
                     "%dbundles" % num_bundles]))

        if self._run_log_dir:
            if not os.path.isdir(self._run_log_dir):
                try:
                    os.makedirs(self._run_log_dir)
                except:
                    if not op.isdir(self._run_log_dir):
                        err = "[!] Log directory unable to be created.\n" \
                              "Path: %s\n\n" % self._run_log_dir
                        raise Exception(err)
                    else:
                        pass

        if num_bundles == 1:
            self._config["num_sessions_at_once"] = \
                len(self._bundles_list[0])

        # Start the magic
        if not self._platform and not self._bundle_idx:
            # not a cluster/grid run
            for idx in range(1, num_bundles + 1):
                results.append(self.run_one_bundle(idx))

        elif not self._bundle_idx:
            # there is a self._bundle_idx only if the pipeline runner is run
            # with bundle_idx as a parameter - only happening either manually,
            # or when running on a cluster
            self.submit_cluster_batch_file(num_bundles)

        else:
            # if there is a bundle_idx supplied to the runner
            results = self.run_one_bundle(self._bundle_idx)
示例#4
0
def build_collect_workflow(args, retval):
    import re
    import glob
    import warnings

    warnings.filterwarnings("ignore")
    import ast
    import pkg_resources
    from pathlib import Path
    import yaml

    try:
        import pynets

        print(f"\n\nPyNets Version:\n{pynets.__version__}\n\n")
    except ImportError:
        print(
            "PyNets not installed! Ensure that you are using the correct python version."
        )

    # Set Arguments to global variables
    resources = args.pm
    if resources:
        procmem = list(eval(str(resources)))
    else:
        from multiprocessing import cpu_count

        nthreads = cpu_count()
        procmem = [int(nthreads), int(float(nthreads) * 2)]
    plugin_type = args.plug
    if isinstance(plugin_type, list):
        plugin_type = plugin_type[0]
    verbose = args.v
    working_path = args.basedir
    work_dir = args.work
    modality = args.modality

    os.makedirs(f"{str(Path(working_path).parent)}/all_visits_netmets_auc",
                exist_ok=True)

    wf = collect_all(working_path, modality)

    with open(pkg_resources.resource_filename("pynets", "runconfig.yaml"),
              "r") as stream:
        try:
            hardcoded_params = yaml.load(stream)
            runtime_dict = {}
            execution_dict = {}
            for i in range(len(hardcoded_params["resource_dict"])):
                runtime_dict[list(hardcoded_params["resource_dict"][i].keys(
                ))[0]] = ast.literal_eval(
                    list(hardcoded_params["resource_dict"][i].values())[0][0])
            for i in range(len(hardcoded_params["execution_dict"])):
                execution_dict[list(
                    hardcoded_params["execution_dict"][i].keys())[0]] = list(
                        hardcoded_params["execution_dict"][i].values())[0][0]
        except FileNotFoundError:
            print("Failed to parse runconfig.yaml")

    os.makedirs(f"{work_dir}{'/pynets_out_collection'}", exist_ok=True)
    wf.base_dir = f"{work_dir}{'/pynets_out_collection'}"

    if verbose is True:
        from nipype import config, logging

        cfg_v = dict(
            logging={
                "workflow_level": "DEBUG",
                "utils_level": "DEBUG",
                "interface_level": "DEBUG",
                "filemanip_level": "DEBUG",
                "log_directory": str(wf.base_dir),
                "log_to_file": True,
            },
            monitoring={
                "enabled": True,
                "sample_frequency": "0.1",
                "summary_append": True,
                "summary_file": str(wf.base_dir),
            },
        )
        logging.update_logging(config)
        config.update_config(cfg_v)
        config.enable_debug_mode()
        config.enable_resource_monitor()

        import logging

        callback_log_path = f"{wf.base_dir}{'/run_stats.log'}"
        logger = logging.getLogger("callback")
        logger.setLevel(logging.DEBUG)
        handler = logging.FileHandler(callback_log_path)
        logger.addHandler(handler)

    execution_dict["crashdump_dir"] = str(wf.base_dir)
    execution_dict["plugin"] = str(plugin_type)
    cfg = dict(execution=execution_dict)
    for key in cfg.keys():
        for setting, value in cfg[key].items():
            wf.config[key][setting] = value
    try:
        wf.write_graph(graph2use="colored", format="png")
    except BaseException:
        pass
    if verbose is True:
        from nipype.utils.profiler import log_nodes_cb

        plugin_args = {
            "n_procs": int(procmem[0]),
            "memory_gb": int(procmem[1]),
            "status_callback": log_nodes_cb,
            "scheduler": "mem_thread",
        }
    else:
        plugin_args = {
            "n_procs": int(procmem[0]),
            "memory_gb": int(procmem[1]),
            "scheduler": "mem_thread",
        }
    print("%s%s%s" % ("\nRunning with ", str(plugin_args), "\n"))
    wf.run(plugin=plugin_type, plugin_args=plugin_args)
    if verbose is True:
        from nipype.utils.draw_gantt_chart import generate_gantt_chart

        print("Plotting resource profile from run...")
        generate_gantt_chart(callback_log_path, cores=int(procmem[0]))
        handler.close()
        logger.removeHandler(handler)

    files_ = glob.glob(
        f"{str(Path(working_path).parent)}{'/all_visits_netmets_auc/*clean.csv'}"
    )

    print("Aggregating dataframes...")
    dfs = []
    for file_ in files_:
        df = pd.read_csv(file_, chunksize=100000).read()
        try:
            df.drop(df.filter(regex="Unname"), axis=1, inplace=True)
        except BaseException:
            pass
        dfs.append(df)
        del df
    df_concat(dfs, working_path)

    return
示例#5
0
    def run(self, number_of_cores=1, memory=None, save_profiler_log=False):
        """Execute the workflow of the super-resolution reconstruction pipeline.

        Nipype execution engine will take care of the management and execution of
        all processing steps involved in the super-resolution reconstruction pipeline.
        Note that the complete execution graph is saved as a PNG image to support
        transparency on the whole processing.

        Parameters
        ----------
        number_of_cores : int
            Number of cores / CPUs used by the workflow

        memory : int
            Maximal memory used by the workflow

        save_profiler_log : bool
            If `True`, generates the profiling callback log
            (Default: `False`)
        """
        from nipype import logging as nipype_logging

        # Use nipype.interface logger to print some information messages
        iflogger = nipype_logging.getLogger('nipype.interface')
        iflogger.info("**** Workflow graph creation ****")
        self.wf.write_graph(dotfilename='graph.dot',
                            graph2use='colored',
                            format='png',
                            simple_form=True)

        # Copy and rename the generated "graph.png" image
        src = os.path.join(self.wf.base_dir, self.wf.name, 'graph.png')
        if self.session is not None:
            dst = os.path.join(
                self.output_dir, '-'.join(["pymialsrtk", __version__]),
                self.subject, self.session, 'figures',
                f'{self.subject}_{self.session}_rec-SR_id-{self.sr_id}_desc-processing_graph.png'
            )
        else:
            dst = os.path.join(
                self.output_dir, '-'.join(["pymialsrtk", __version__]),
                self.subject, 'figures',
                f'{self.subject}_rec-SR_id-{self.sr_id}_desc-processing_graph.png'
            )
        # Create the figures/ and parent directories if they do not exist
        figures_dir = os.path.dirname(dst)
        os.makedirs(figures_dir, exist_ok=True)
        # Make the copy
        iflogger.info(f'\t > Copy {src} to {dst}...')
        shutil.copy(src=src, dst=dst)

        # Create dictionary of arguments passed to plugin_args
        args_dict = {
            'maxtasksperchild': 1,
            'raise_insufficient': False,
            'n_procs': number_of_cores
        }

        if (memory is not None) and (memory > 0):
            args_dict['memory_gb'] = memory

        if save_profiler_log:
            args_dict['status_callback'] = log_nodes_cb
            # Set path to log file and create callback logger
            callback_log_path = os.path.join(self.wf.base_dir, self.wf.name,
                                             'run_stats.log')
            import logging
            import logging.handlers
            logger = logging.getLogger('callback')
            logger.setLevel(logging.DEBUG)
            handler = logging.FileHandler(callback_log_path)
            logger.addHandler(handler)

        iflogger.info("**** Processing ****")
        # datetime object containing current start date and time
        start = datetime.now()
        self.run_start_time = start.strftime("%B %d, %Y / %H:%M:%S")
        print(f" Start date / time : {self.run_start_time}")

        # Execute the workflow
        res = self.wf.run(plugin='MultiProc', plugin_args=args_dict)

        # Copy and rename the workflow execution log
        src = os.path.join(self.wf.base_dir, "pypeline.log")
        if self.session is not None:
            dst = os.path.join(
                self.output_dir, '-'.join(["pymialsrtk", __version__]),
                self.subject, self.session, 'logs',
                f'{self.subject}_{self.session}_rec-SR_id-{self.sr_id}_log.txt'
            )
        else:
            dst = os.path.join(
                self.output_dir, '-'.join(["pymialsrtk",
                                           __version__]), self.subject, 'logs',
                f'{self.subject}_rec-SR_id-{self.sr_id}_log.txt')
        # Create the logs/ and parent directories if they do not exist
        logs_dir = os.path.dirname(dst)
        os.makedirs(logs_dir, exist_ok=True)
        # Make the copy
        iflogger.info(f'\t > Copy {src} to {dst}...')
        shutil.copy(src=src, dst=dst)

        # datetime object containing current end date and time
        end = datetime.now()
        self.run_end_time = end.strftime("%B %d, %Y / %H:%M:%S")
        print(f" End date / time : {self.run_end_time}")

        # Compute elapsed running time in minutes and seconds
        duration = end - start
        (minutes, seconds) = divmod(duration.total_seconds(), 60)
        self.run_elapsed_time = f'{int(minutes)} minutes and {int(seconds)} seconds'
        print(f" Elapsed time: {self.run_end_time}")

        iflogger.info("**** Write dataset derivatives description ****")
        for toolbox in ["pymialsrtk", "nipype"]:
            write_bids_derivative_description(bids_dir=self.bids_dir,
                                              deriv_dir=self.output_dir,
                                              pipeline_name=toolbox)

        if save_profiler_log:
            iflogger.info("**** Workflow execution profiling ****")
            iflogger.info(f'\t > Creation of report...')
            generate_gantt_chart(logfile=callback_log_path,
                                 cores=number_of_cores,
                                 minute_scale=10,
                                 space_between_minutes=50,
                                 pipeline_name=os.path.join(
                                     self.wf.base_dir, self.wf.name))
            # Copy and rename the computational resources log
            src = os.path.join(self.wf.base_dir, self.wf.name,
                               "run_stats.log.html")
            if self.session is not None:
                dst = os.path.join(
                    self.output_dir, '-'.join(["pymialsrtk", __version__]),
                    self.subject, self.session, 'logs',
                    f'{self.subject}_{self.session}_rec-SR_id-{self.sr_id}_desc-profiling_log.html'
                )
            else:
                dst = os.path.join(
                    self.output_dir, '-'.join(["pymialsrtk", __version__]),
                    self.subject, 'logs',
                    f'{self.subject}_rec-SR_id-{self.sr_id}_desc-profiling_log.html'
                )
            # Make the copy
            iflogger.info(f'\t > Copy {src} to {dst}...')
            shutil.copy(src=src, dst=dst)

        iflogger.info("**** Super-resolution HTML report creation ****")
        self.create_subject_report()

        return res