示例#1
0
class PipeImporter(object):
    def __init__(self, pipe_template_dir, dbm, forTest=False):
        '''Load json file.

        Args:
            pipe_template_dir: Path to pipeline directory
        '''
        self.forTest = forTest
        self.dbm = dbm
        self.file_man = FileMan(self.dbm.lostconfig)
        if pipe_template_dir.endswith('/'):
            pipe_template_dir = pipe_template_dir[:-1]
        self.src_pipe_template_path = pipe_template_dir
        self.dst_pipe_template_path = os.path.join(
            self.file_man.pipe_path,
            os.path.basename(self.src_pipe_template_path))
        self.json_files = glob(os.path.join(pipe_template_dir, '*.json'))
        self.pipes = []
        self.namespace = os.path.basename(
            self.src_pipe_template_path).strip('/')
        for json_path in self.json_files:
            with open(json_path) as jfile:
                pipe = json.load(jfile)
            pipe['namespace'] = self.namespace
            pipe['name'] = self._namespaced_name(
                os.path.splitext(os.path.basename(json_path))[0])
            self.pipes.append(pipe)
            # Set name to name of the script file
            for pe in pipe['elements']:
                if 'script' in pe:
                    pe['script']['name'] = self._namespaced_name(
                        pe['script']['path'])
        self.checker = PipeDefChecker(logging)

    def _namespaced_name(self, name):
        return '{}.{}'.format(self.namespace, name)

    def update_pipe_project(self):

        if os.path.exists(self.dst_pipe_template_path):
            logging.info('\n\n++++++++++++++++++++++\n\n')
            for pipe in self.pipes:
                if not self.checker.check(pipe):
                    logging.error('Pipeline was not updated!')
                    return False
            for pipe in self.pipes:
                self.update_pipe(pipe)
            dir_util.copy_tree(self.src_pipe_template_path,
                               self.dst_pipe_template_path)

            logging.info(
                "Copyed pipeline template dir from %s to %s" %
                (self.src_pipe_template_path, self.dst_pipe_template_path))
        else:
            logging.warning(('Cannot update. No such pipe project: *{}*. '
                             'Maybe you want to import a pipeline instead '
                             'of updating it.').format(self.namespace))

    def update_pipe(self, pipe):
        for db_pipe in self.dbm.get_all_pipeline_templates():
            db_json = json.loads(db_pipe.json_template)
            # update pipeline if already present in db
            if db_json['name'].lower() == pipe['name'].lower():
                # Do everything relative from pipeline definition file path.
                oldwd = os.getcwd()
                os.chdir(self.src_pipe_template_path)
                logging.info('Updated pipeline: {}'.format(db_json['name']))
                for pe_j in pipe['elements']:
                    if 'script' in pe_j:
                        element_j = pe_j['script']
                        script = parse_script(element_j)
                        db_script = self.dbm.get_script(
                            name=self._get_script_name(script))
                        script_arguments = get_default_script_arguments(
                            script.path)
                        script_envs = get_default_script_envs(script.path)
                        script_resources = get_default_script_resources(
                            script.path)
                        if 'arguments' in element_j:
                            for arg in element_j['arguments']:
                                if arg not in script_arguments:
                                    logging.error(
                                        "Invalid argument >> {} << in pipeline definition json"
                                        .format(arg))
                                    valid_args = ""
                                    for v_arg in script_arguments:
                                        valid_args += ">> {} <<\n".format(
                                            v_arg)
                                    logging.error(
                                        "Valid arguments are: \n{}".format(
                                            valid_args[:-1]))
                                    raise Exception(
                                        'Invalid arguments. Start Cleanup')
                        if db_script is None:
                            self.dbm.add(script)
                            self.dbm.commit()
                            script_out_path = os.path.join(
                                self.dst_pipe_template_path, script.path)
                            script.path = self.file_man.make_path_relative(
                                script_out_path)
                            script.arguments = json.dumps(script_arguments)
                            script.envs = json.dumps(script_envs)
                            script.resources = json.dumps(script_resources)
                            self.dbm.save_obj(script)
                            logging.info("Added script to database")
                        else:
                            script_out_path = os.path.join(
                                self.dst_pipe_template_path, script.path)
                            db_script.path = self.file_man.make_path_relative(
                                script_out_path)
                            db_script.arguments = json.dumps(script_arguments)
                            db_script.envs = json.dumps(script_envs)
                            db_script.description = script.description
                            db_script.resources = json.dumps(script_resources)
                            self.dbm.save_obj(db_script)
                            logging.info('Updated script: {}'.format(
                                db_script.name))
                    # self._fix_sia_config(pe_j)
                db_pipe.json_template = json.dumps(pipe)
                self.dbm.save_obj(db_pipe)
                os.chdir(oldwd)  # Change dir back to old working directory.
                return True
        # import pipe if not already present in database
        self.import_pipe(pipe)

    def _get_script_name(self, script):
        return self._namespaced_name(os.path.basename(script.path))

    def start_import(self):
        logging.info('\n\n++++++++++++++++++++++ \n\n')
        logging.info('Start pipe project import for: {}'.format(
            self.src_pipe_template_path))
        for pipe in self.pipes:
            if not self.checker.check(pipe):
                logging.error(
                    'Wrong pipeline definition! Did not import pipe project!')
                return False
        if os.path.exists(self.dst_pipe_template_path):
            logging.warning('Cannot import pipeline!')
            logging.warning('Pipe Template Dir already exist: {}'.format(
                self.dst_pipe_template_path))
            return
        dir_util.copy_tree(self.src_pipe_template_path,
                           self.dst_pipe_template_path)
        logging.info(
            "Copyed pipeline template dir from %s to %s" %
            (self.src_pipe_template_path, self.dst_pipe_template_path))
        for pipe in self.pipes:
            self.import_pipe(pipe)

    def import_pipe(self, pipe):
        try:
            logging.info('\n---\n')
            # Do everything relative from pipeline definition file path.
            oldwd = os.getcwd()
            os.chdir(self.src_pipe_template_path)
            for db_pipe in self.dbm.get_all_pipeline_templates():
                db_json = json.loads(db_pipe.json_template)
                if db_json['name'].lower() == pipe['name'].lower():
                    logging.warning("PipeTemplate in database.")
                    logging.warning("Name of this template is: %s" %
                                    (pipe['name'], ))
                    logging.warning("Will not import PipeTemplate.")
                    return db_pipe.idx
            for pe_j in pipe['elements']:
                if 'script' in pe_j:
                    element_j = pe_j['script']
                    script = parse_script(element_j)
                    db_script = self.dbm.get_script(
                        name=self._get_script_name(script))
                    script_arguments = get_default_script_arguments(
                        script.path)
                    script_envs = get_default_script_envs(script.path)
                    script_resources = get_default_script_resources(
                        script.path)
                    if 'arguments' in element_j:
                        for arg in element_j['arguments']:
                            if arg not in script_arguments:
                                logging.error(
                                    "Invalid argument >> {} << in pipeline definition json"
                                    .format(arg))
                                valid_args = ""
                                for v_arg in script_arguments:
                                    valid_args += ">> {} <<\n".format(v_arg)
                                logging.error(
                                    "Valid arguments are: \n{}".format(
                                        valid_args[:-1]))
                                raise Exception(
                                    'Invalid arguments. Start Cleanup')
                    if db_script is None:
                        self.dbm.add(script)
                        self.dbm.commit()
                        script_out_path = os.path.join(
                            self.dst_pipe_template_path, script.path)
                        script.path = self.file_man.make_path_relative(
                            script_out_path)
                        script.arguments = json.dumps(script_arguments)
                        script.envs = json.dumps(script_envs)
                        script.resources = json.dumps(script_resources)
                        self.dbm.save_obj(script)
                        logging.info("Added script to database\n")
                    else:
                        logging.warning(
                            "Script is already present in database.\n")
                        logging.warning((str(db_script.idx), db_script.name,
                                         db_script.path))
                # self._fix_sia_config(pe_j)
            pipe_temp = model.PipeTemplate(json_template=json.dumps(pipe),
                                           timestamp=datetime.now())
            self.dbm.save_obj(pipe_temp)
            logging.info("Added Pipeline: *** %s ***" % (pipe['name'], ))
            os.chdir(oldwd)  # Change dir back to old working directory.
            return pipe_temp.idx
        except Exception as e:
            logging.error(e, exc_info=True)
            if not self.forTest:
                self.remove_pipe_project()
            logging.error('Cleanup successful. Removed buggy pipeline.')

    def remove_pipe_project(self):
        '''Remove an imported pipeline project from lost system.

        Note:
            Pipeline folder in LOST filesystem and all related db
            entrys will be deleted.
        '''
        clean_filesystem = True
        for pipe in self.pipes:
            if not self.remove_pipeline(pipe):
                clean_filesystem = False
        if clean_filesystem:
            shutil.rmtree(self.dst_pipe_template_path)
            logging.info(
                'Removed pipeline project from lost filesystem {}'.format(
                    self.dst_pipe_template_path))
            logging.info(
                'Whole pipeline project {} was successfull removed'.format(
                    self.namespace))
        else:
            logging.info('''Pipeline project {} was not completely removed 
                since some pipes are still in use'''.format(self.namespace))

    def remove_pipeline(self, pipe):
        '''Remove all related db entrys of a pipeline from lost database.
        '''
        #TODO: Remove script
        for db_pipe in self.dbm.get_all_pipeline_templates():
            db_json = json.loads(db_pipe.json_template)
            if db_json['name'].lower() == pipe['name'].lower():
                t = self.dbm.get_pipe(pipe_template_id=db_pipe.idx)
                if t is None:
                    for pe_j in db_json['elements']:
                        if 'script' in pe_j:
                            script_man.remove_script(
                                self.dbm,
                                os.path.join(
                                    self.namespace,
                                    os.path.basename(pe_j['script']['path'])))
                    self.dbm.delete(db_pipe)
                else:
                    logging.warning(
                        "Cannot remove pipeline. It is already in use by task with ID: %s"
                        % (t.idx, ))
                    return False
                self.dbm.commit()
                logging.info("Removed pipeline successfull: {}".format(
                    pipe['name']))
                return True
        return True
示例#2
0
class Script(pe_base.Element):
    '''Superclass for a user defined Script.

    Custom scripts need to inherit from Script and implement the main method.

    Attributes:
        pe_id (int): Pipe element id. Assign the pe id of a pipline script
            in order to emulate this script in a jupyter notebook for example.
    '''
    def __init__(self, pe_id=None):
        if pe_id is None:
            parser = argparse.ArgumentParser(
                description='A user defined script.')
            parser.add_argument('--idx',
                                nargs='?',
                                action='store',
                                help='Id of related pipeline element.')
            args = parser.parse_args()
        lostconfig = LOSTConfig()
        dbm = access.DBMan(lostconfig)
        db_fs = dbm.get_fs(name='lost_data')
        self.file_man = FileMan(fs_db=db_fs)
        self._dbm = dbm  #type: lost.db.access.DBMan
        if pe_id is None:
            pe = dbm.get_pipe_element(int(args.idx))
        else:
            pe = dbm.get_pipe_element(pe_id)
        super().__init__(pe, dbm)
        logfile_path = self.file_man.get_pipe_log_path(self._pipe.idx)
        self._log_stream = self.file_man.fs.open(logfile_path, 'a')
        self._logger = log.get_stream_logger(os.path.basename(pe.script.path),
                                             self._log_stream)
        if self.pipe_info.logfile_path is None or not self.pipe_info.logfile_path:
            self.pipe_info.logfile_path = self.get_rel_path(logfile_path)
        self._inp = inout.Input(self)
        self._outp = inout.ScriptOutput(self)
        self.rejected_execution = False
        # If pe_id is None we have a normal script
        # If pe_id is not None a JupyterNotebook uses this script
        if pe_id is None:
            return self._run()

    def _run(self, ret_success=False):
        try:
            self.main()
            self.i_am_done()
            success = 'PipeElementID: {}, Successfully executed script: {}'.format(
                self._pipe_element.idx, self._pipe_element.script.name)
            self._dbm.close_session()
            if ret_success:
                return success
        except:
            err_msg = str(datetime.datetime.now()) + '\n'
            err_msg += traceback.format_exc()
            self.report_err(err_msg)
            self._dbm.close_session()

    def __str__(self):
        my_str = 'I am a Script.\nMy name is: {}\nPipeElementID: {}'.format(
            self._pipe_element.script.name, self._pipe_element.idx)
        return my_str

    def main(self):
        #raise NotImplementedError("You need to implement a main method to get your Script running.")
        pass

    @property
    def logger(self):
        ''':class:`logging.Logger`: A standard python logger for this script. 
        
        It will log to the pipline log file.
        '''
        return self._logger

    @property
    def inp(self):
        ''':class:`lost.pyapi.inout.Input`
        '''
        return self._inp  #type: inout.Input

    @property
    def outp(self):
        ''':class:`lost.pyapi.inout.ScriptOutput`
        '''
        return self._outp  #type: inout.ScriptOutput

    def get_rel_path(self, path):
        '''Get relativ path for current project

        Args:
            path (str): A absolute path

        Returns:
            str : Relative path
        '''
        return self.file_man.get_rel_path(path)

    def get_label_tree(self, name):
        '''Get a LabelTree by name.
        
        Args:
            name (str): Name of the desired LabelTree.
        
        Retruns:
            :class:`lost.logic.label.LabelTree` or None: 
                If a label tree with the given name exists 
                it will be returned. Otherwise None
                will be returned'''
        group_id = self._pipe.group_id
        root_list = self._dbm.get_all_label_trees(group_id, add_global=True)
        root = next(filter(lambda x: x.name == name, root_list), None)
        if root is None:
            return None
        else:
            return LabelTree(self._dbm, root_leaf=root)

    def create_label_tree(self, name, external_id=None):
        '''Create a new LabelTree
        
        Args:
            name (str): Name of the tree / name of the root leaf.
            external_id (str): An external id for the root leaf.
        
        Returns:
            :class:`lost.logic.label.LabelTree`:
                The created LabelTree.
        '''
        tree = LabelTree(self._dbm)
        tree.create_root(name, external_id=external_id)
        return tree

    def get_abs_path(self, path):
        '''Get absolute path in current file system.

        Args:
            path (str): A relative path.

        Returns:
            str: Absolute path
        '''
        return self.file_man.get_abs_path(path)

    def break_loop(self):
        '''Break next loop in pipeline.
        '''
        loop_e = self._pipe_man.get_next_loop(self._pipe_element)
        if loop_e is not None:
            loop_e.loop.break_loop = True
        self._dbm.add(loop_e)

    def loop_is_broken(self):
        '''Check if the current loop is broken'''
        loop_e = self._pipe_man.get_next_loop(self._pipe_element)
        if loop_e is not None:
            return loop_e.loop.break_loop
        else:
            self.logger.warning(
                'loop_is_broken method was used, but no loop seems to be in this pipeline!'
            )
            return False

    def get_arg(self, arg_name):
        '''Get argument value by name for this script.

        Args:
            arg_name (str): Name of the argument.

        Returns:
            Value of the given argument.
        '''
        if self._pipe_element.arguments:
            args = json.loads(self._pipe_element.arguments)
            # args = ast.literal_eval(self._pipe_element.arguments)
            my_arg = args[arg_name]['value']
            if my_arg in ['t', 'true', 'yes']:
                return True
            if my_arg in ['f', 'false', 'no']:
                return False
            if my_arg in ['-', '', '[]']:
                return None
            try:
                return ast.literal_eval(my_arg)
            except:
                return my_arg

        else:
            return None

    def get_filesystem(self, name=None):
        '''Get default lost filesystem or a specific filesystem by name.

        Returns:
            fsspec.spec.AbstractFileSystem: See https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem
        '''
        if name is None:
            return self.file_man.fs
        #TODO: Check if pipeline user is permitted to load fs
        fs_db = self._dbm.get_fs(name=name)
        fm = FileMan(fs_db=fs_db)
        return fm.fs

    def get_path(self, file_name, context='instance', ptype='abs'):
        '''Get path for the filename in a specific context in filesystem.

        Args:
            file_name (str): Name or relative path for a file.
            context (str): Options: *instance*, *pipe*, *static*
            ptype (str): Type of this path. Can be relative or absolute
                Options: *abs*, *rel*

        Returns:
            str: Path to the file in the specified context.
        '''
        if context == 'instance':
            path = os.path.join(self.instance_context, file_name)
        elif context == 'pipe':
            path = os.path.join(self.pipe_context, file_name)
        elif context == 'static':
            path = os.path.join(self.static_context, file_name)
        else:
            raise Exception('Unknown context: {}'.format(context))
        if ptype == 'abs':
            return path
        elif ptype == 'rel':
            return self.get_rel_path(path)
        else:
            raise Exception('Unknown argument ptype: {}'.format(ptype))

    @property
    def iteration(self):
        '''int: Get the current iteration.

        Number of times this script has been executed.
        '''
        return self._pipe_element.iteration

    @property
    def instance_context(self):
        '''str: Get the path to store files that are only valid for this instance.
        '''
        abs_path = self.file_man.create_instance_path(self._pipe_element)
        rel_path = self.file_man.make_path_relative(abs_path)
        self._pipe_element.instance_context = rel_path
        self._dbm.add(self._pipe_element)
        return abs_path

    @property
    def pipe_context(self):
        '''str: Root path to store files that should be visible for all elements
        in the pipeline.
        '''
        return self.file_man.get_pipe_context_path(self._pipe_element)

    @property
    def static_context(self):
        '''str: Get the static path.

        Files that are stored at this path can be accessed by all instances of a
        script.
        '''
        #TODO: Check how to handle different filesystem!
        return os.path.join(self._lostconfig.app_path,
                            os.path.split(self._pipe_element.script.path)[0])

    @property
    def progress(self):
        '''float: Get current progress that is displayed in the progress bar of this script.

        Current progress in percent 0...100
        '''
        return self._pipe_element.progress

    def update_progress(self, value):
        '''Update the progress for this script.

        Args:
            value (float): Progress in percent 0...100
        '''
        self._pipe_element.progress = value
        self._dbm.commit()

    def reject_execution(self):
        '''Reject execution of this script and set it to PENDING again.

        Note:
            This method is useful if you want to execute this script only
            when some condition based on previous pipeline elements is 
            meet.
        '''
        self.rejected_execution = True

    def get_alien_element(self, pe_id):
        '''Get an pipeline element by id from somewhere in the LOST system.

        It is an alien element since it is most likely not part of the 
        pipeline instance this script belongs to.

        Args:
            pe_id (int): PipeElementID of the alien element.
        
        Returns:
            * :class:`lost.pyapi.script.Script`
            * :class:`lost.pyapi.pipe_elements.AnnoTask`
            * :class:`lost.pyapi.pipe_elements.Datasource`
            * :class:`lost.pyapi.pipe_elements.VisualOutput`
            * :class:`lost.pyapi.pipe_elements.DataExport`
            * :class:`lost.pyapi.pipe_elements.Loop`

        '''
        pe = self._dbm.get_pipe_element(pe_id)

        if pe.dtype == dtype.PipeElement.SCRIPT:
            return Script(pe_id=pe_id)
        elif pe.dtype == dtype.PipeElement.ANNO_TASK:
            return pipe_elements.AnnoTask(pe, self._dbm)
        elif pe.dtype == dtype.PipeElement.DATASOURCE:
            return pipe_elements.Datasource(pe, self._dbm)
        elif pe.dtype == dtype.PipeElement.VISUALIZATION:
            return pipe_elements.VisualOutput(pe, self._dbm)
        elif pe.dtype == dtype.PipeElement.DATA_EXPORT:
            return pipe_elements.DataExport(pe, self._dbm)
        elif pe.dtype == dtype.PipeElement.LOOP:
            return pipe_elements.Loop(pe, self._dbm)
        else:
            raise Exception('Unknown pipe element type!')

    def i_am_done(self):
        if self.rejected_execution:
            self._pipe_element.state = state.PipeElement.PENDING
            self._dbm.add(self._pipe)
            self._dbm.add(self._pipe_element)
            self._dbm.commit()
            return

        #Save all changes to database
        if self._pipe_element.is_debug_mode == False:
            self._pipe_element.state = state.PipeElement.FINISHED
            self._pipe_element.progress = 100.0
            self._pipe.state = state.Pipe.IN_PROGRESS
            self._dbm.add(self._pipe)
            self._dbm.add(self._pipe_element)
            self._dbm.commit()
        else:
            answer = input("Have you finished debugging? [y/n]: ")
            if answer[0].lower() == 'y':
                self._pipe_element.state = state.PipeElement.FINISHED
                self._pipe_element.progress = 100.0
                self._pipe.state = state.Pipe.IN_PROGRESS
                self._dbm.add(self._pipe)
                self._dbm.add(self._pipe_element)
            else:
                self.outp.clean_up()
            self._pipe_man.pipe.state = state.Pipe.IN_PROGRESS
            self._dbm.commit()
        self._log_stream.close()

    def report_err(self, msg):
        '''Report an error for this user script to portal

        Args:
            msg: The error message that should be reported.

        Note:
            You can call this method multiple times if you like. All messages
            will be concatenated an sent to the portal.
        '''
        self.logger.error(msg)
        report_script_err(self._pipe_element, self._pipe, self._dbm, msg)