示例#1
0
class Source:
    """
    Representation of a collection with the data collected by a source code analysis. The collection contains
    information about functions, variable initializations, a functions call graph, macros.
    """
    def __init__(self, logger, conf, abstract_task):
        """
        Setup initial attributes and get logger object.

        :param logger: logging object.
        :param conf: Source code analysis configuration.
        :param abstract_task: Abstract verification task dictionary (given by VTG).
        :param conf: Configuration properties dictionary.
        """
        self.logger = logger
        self._conf = conf
        self._source_functions = dict()
        self._source_vars = dict()
        self._macros = dict()
        self.__function_calls_cache = dict()

        # Initialize Clade cient to make requests
        self._clade = Clade(self._conf['build base'])

        # Ask for dependencies for each CC
        cfiles, files_map = self._collect_file_dependencies(abstract_task)

        # Read file with source analysis
        self._import_code_analysis(cfiles, files_map)

    @property
    def source_functions(self):
        """
        Return a list of function names.

        :return: function names list.
        """
        return list(self._source_functions.keys())

    def get_source_function(self, name, path=None, declaration=None):
        """
        Provides the function by a given name from the collection.

        :param name: Function name.
        :param path: File where the function should be declared or defined.
        :param declaration: Declaration object representing the function of interest.
        :return: Function object or None.
        """
        name = self.refined_name(name)
        if name and name in self._source_functions:
            if path and path in self._source_functions[name]:
                return self._source_functions[name][path]
            else:
                functions = self.get_source_functions(name,
                                                      declaration=declaration)
                if len(functions) == 1:
                    return functions[0]
                elif len(functions) > 1:
                    raise ValueError(
                        "There are several definitions of function {!r} in provided code you must specify "
                        "scope".format(name))
        return None

    def get_source_functions(self, name, declaration=None):
        """
        Provides all functions found by a given name from the collection.

        :param name: Function name.
        :param declaration: Declaration object representing the function of interest.
        :return: List with Function objects.
        """
        name = self.refined_name(name)
        result = []
        if name and name in self._source_functions:
            for func in self._source_functions[name].values():
                if func not in result and (
                        not declaration or
                    (declaration and declaration.compare(func.declaration))):
                    result.append(func)
        return result

    def set_source_function(self, new_obj, path):
        """
        Replace an Function object in the collection.

        :param new_obj: Function object.
        :param path: File where the function should be declared or defined.
        :return: None.
        """
        if new_obj.name not in self._source_functions:
            self._source_functions[new_obj.name] = dict()
        self._source_functions[new_obj.name][path] = new_obj

    def remove_source_function(self, name):
        """
        Delete the function from the collection.

        :param name: Function name.
        :return: None.
        """
        del self._source_functions[name]

    @property
    def source_variables(self):
        """
        Return list of global variables.

        :return: Variable names list.
        """
        return list(self._source_vars.keys())

    def get_source_variable(self, name, path=None):
        """
        Provides a gloabal variable by a given name and scope file from the collection.

        :param name: Variable name.
        :param path: File with the variable declaration or initialization.
        :return: Variable object or None.
        """
        name = self.refined_name(name)
        if name and name in self._source_vars:
            if path and path in self._source_vars[name]:
                return self._source_vars[name][path]
            else:
                variables = self.get_source_variables(name)
                if len(variables) == 1:
                    return variables[0]
        return None

    def get_source_variables(self, name):
        """
        Provides all global variables by a given name from the collection.

        :param name: Variable name.
        :return: List with Variable objects.
        """
        name = self.refined_name(name)
        result = []
        if name and name in self._source_vars:
            for var in self._source_vars[name].values():
                if var not in result:
                    result.append(var)
        return result

    def set_source_variable(self, new_obj, path):
        """
        Replace an object in global variables collection.

        :param new_obj: Variable object.
        :param path: File with the variable declaration or initialization.
        :return: None.
        """
        if new_obj.name not in self._source_vars:
            self._source_vars[new_obj.name] = dict()
        self._source_vars[new_obj.name][path] = new_obj

    def remove_source_variable(self, name):
        """
        Delete the global variable from the collection.

        :param name: Variable name.
        :return: None.
        """
        del self._source_vars[name]

    def get_macro(self, name):
        """
        Provides a macro by a given name from the collection.

        :param name: Macro name.
        :return: Macro object or None.
        """
        if name in self._macros:
            return self._macros[name]
        else:
            return None

    def set_macro(self, new_obj):
        """
        Set or replace an object in macros collection.

        :param new_obj: Macro object.
        :return: None.
        """
        self._macros[new_obj.name] = new_obj

    def remove_macro(self, name):
        """
        Delete the macro from the collection.

        :param name: Macro name.
        :return: None.
        """
        del self._macros[name]

    @staticmethod
    def refined_name(call):
        """
        Resolve function name from simple expressions which contains explicit function name like '& myfunc', '(myfunc)',
        '(& myfunc)' or 'myfunc'.

        :param call: An expression string.
        :return: Extracted function name string.
        """
        name_re = re.compile("\(?\s*&?\s*(\w+)\s*\)?$")
        if name_re.fullmatch(call):
            return name_re.fullmatch(call).group(1)
        else:
            return None

    def _import_code_analysis(self, cfiles, dependencies):
        """
        Read global variables, functions and macros to fill up the collection.

        :param source_analysis: Dictionary with the content of source analysis.
        :param files_map: Dictionary to resolve main file by an included file.
        :return: None.
        """
        # Import typedefs if there are provided
        self.logger.info("Extract complete types definitions")
        typedef = self._clade.get_typedefs(
            set(dependencies.keys()).union(cfiles))
        if typedef:
            import_typedefs(typedef, dependencies)

        variables = self._clade.get_variables(cfiles)
        if variables:
            self.logger.info("Import global variables initializations")
            for path, vals in variables.items():
                for variable in vals:
                    variable_name = extract_name(variable['declaration'])
                    if not variable_name:
                        raise ValueError('Global variable without a name')
                    var = Variable(variable_name, variable['declaration'])

                    # Here we know, that if we met a variable in an another file then it is an another variable because
                    # a program should contain a single global variable initialization
                    self.set_source_variable(var, path)
                    var.declaration_files.add(path)
                    var.initialization_file = path
                    var.static = is_static(variable['declaration'])

                    if 'value' in variable:
                        var.value = variable['value']

        # Variables which are used in variables initalizations
        self.logger.info("Import source functions")
        vfunctions = self._clade.get_used_in_vars_functions()

        # Get functions defined in dependencies and in the main functions and have calls
        cg = self._clade.get_callgraph(set(dependencies.keys()))

        # Function scope definitions
        # todo: maybe this should be fixed in Clade
        # As we will not get definitions for library functions if there are in compiled parts we should add all scopes
        # that are given for all function called from outside of the code we analyze
        for scope in (s for s in cfiles if s in cg):
            for func in (f for f in cg[scope] if cg[scope][f].get('calls')):
                for dep in cg[scope][func].get('calls'):
                    dependencies.setdefault(dep, set())
                    dependencies[dep].add(scope)
        fs = self._clade.get_functions_by_file(
            set(dependencies.keys()).union(cfiles))

        # Add called functions
        for scope in cg:
            for func in cg[scope]:
                desc = cg[scope][func]
                if scope in cfiles:
                    # Definition of the function is in the code of interest
                    self._add_function(func, scope, fs, dependencies, cfiles)
                    # Add called functions
                    for def_scope, cf_desc in desc.get('calls',
                                                       dict()).items():
                        if def_scope not in cfiles:
                            for called_func in (
                                    f for f in cf_desc
                                    if def_scope in fs and f in fs[def_scope]):
                                self._add_function(called_func, def_scope, fs,
                                                   dependencies, cfiles)

                elif ('called_in' in desc
                      and set(desc['called_in'].keys()).intersection(cfiles)
                      ) or func in vfunctions:
                    if scope in fs and func in fs[scope]:
                        # Function is called in the target code but defined in dependencies
                        self._add_function(func, scope, fs, dependencies,
                                           cfiles)
                    elif scope != 'unknown':
                        self.logger.warning(
                            "There is no information on declarations of function {!r} from {!r} scope"
                            .format(func, scope))
        # Add functions missed in the call graph
        for scope in (s for s in fs if s in cfiles):
            for func in fs[scope]:
                func_intf = self.get_source_function(func, scope)
                if not func_intf:
                    self._add_function(func, scope, fs, dependencies, cfiles)

        for func in self.source_functions:
            for obj in self.get_source_functions(func):
                scopes = set(obj.declaration_files).union(set(
                    obj.header_files))
                if not obj.definition_file:
                    # It is likely be this way
                    scopes.add('unknown')
                for scope in (s for s in scopes
                              if cg.get(s, dict()).get(func)):
                    for cscope, desc in ((s, d)
                                         for s, d in cg[scope][func].get(
                                             'called_in', {}).items()
                                         if s in cfiles):
                        for caller in desc:
                            for line in desc[caller]:
                                params = desc[caller][line].get('args')
                                caller_intf = self.get_source_function(
                                    caller, cscope)
                                obj.add_call(caller, cscope)

                                if params:
                                    # Here can be functions which are not defined or visible
                                    for _, passed_func in list(params):
                                        passed_obj = self.get_source_function(
                                            passed_func, cscope)
                                        if not passed_obj:
                                            passed_scope = self._search_function(
                                                passed_func, cscope, fs)
                                            if passed_scope:
                                                self._add_function(
                                                    passed_func, passed_scope,
                                                    fs, dependencies, cfiles)
                                            else:
                                                self.logger.warning(
                                                    "Cannot find function {!r} from scope {!r}"
                                                    .format(
                                                        passed_func, cscope))
                                                # Ignore this call since model will not be correct without signature
                                                params = None
                                                break
                                    caller_intf.call_in_function(obj, params)

        macros_file = get_conf_property(self._conf['source analysis'],
                                        'macros white list')
        if macros_file:
            macros_file = find_file_or_dir(
                self.logger, self._conf['main working directory'], macros_file)
            with open(macros_file, 'r', encoding='utf8') as fp:
                white_list = ujson.load(fp)
            if white_list:
                macros = self._clade.get_macros_expansions(cfiles, white_list)
                for path, macros in macros.items():
                    for macro, desc in macros.items():
                        obj = self.get_macro(macro)
                        if not obj:
                            obj = Macro(macro)
                        for call in desc.get('args', []):
                            obj.add_parameters(path, call)
                        self.set_macro(obj)

    def _search_function(self, func_name, some_scope, fs):
        # Be aware of  this funciton - it is costly
        if some_scope in fs and func_name in fs[some_scope]:
            return some_scope
        elif 'unknown' in fs and func_name in fs['unknown']:
            return 'unknown'
        else:
            for s in (s for s in fs if func_name in fs[s]):
                return s
        return None

    def _add_function(self, func, scope, fs, deps, cfiles):
        fs_desc = fs[scope][func]
        if scope == 'unknown':
            key = list(fs_desc['declarations'].keys())[0]
            signature = fs_desc['declarations'][key]['signature']
            func_intf = Function(func, signature)
            # Do not set definition file since it is out of scope of the target program fragment
        else:
            signature = fs_desc.get('signature')
            func_intf = Function(func, signature)
            func_intf.definition_file = scope

        # Set static
        if fs_desc.get('type') == "static":
            func_intf.static = True
        else:
            func_intf.static = False

        # Add declarations
        files = {func_intf.definition_file
                 } if func_intf.definition_file else set()
        if fs_desc['declarations']:
            files.update({
                f
                for f in fs_desc['declarations']
                if f != 'unknown' and f in deps
            })
        for file in files:
            if file not in cfiles and file not in func_intf.header_files:
                func_intf.header_files.append(file)
            for cfile in deps[file]:
                self.set_source_function(func_intf, cfile)
                func_intf.declaration_files.add(cfile)

    def _collect_file_dependencies(self, abstract_task):
        """
        Collect for each included header file or c file its "main" file to which it was included. This is required
        since we cannot write aspects and instrument files which have no CC command so me build this map.

        :param abstract_task: Abstract task dictionary.
        :return: Collection dictionary {included file: {files that include this one}}.
        """
        collection = dict()
        c_files = set()

        def _collect_cc_deps(cfile, deps):
            # Collect for each file CC entry to which it is included
            for file in deps:
                if file not in collection:
                    collection[file] = set()
                collection[file].add(cfile)

        # Read each CC description and import map of files to in files
        for group in abstract_task['grps']:
            for desc in group['Extra CCs']:
                cc_desc = self._clade.get_cmd(desc['CC'])
                c_file = cc_desc['in'][0]
                # Now read deps
                _collect_cc_deps(c_file, self._clade.get_cmd_deps(desc['CC']))
                c_files.add(c_file)

        return c_files, collection
示例#2
0
class Job(klever.core.components.Component):
    CORE_COMPONENTS = ['PFG', 'VTG', 'VRP']

    def __init__(self,
                 conf,
                 logger,
                 parent_id,
                 callbacks,
                 mqs,
                 vals,
                 id=None,
                 work_dir=None,
                 attrs=None,
                 separate_from_parent=True,
                 include_child_resources=False,
                 components_common_conf=None):
        super(Job,
              self).__init__(conf, logger, parent_id, callbacks, mqs, vals, id,
                             work_dir, attrs, separate_from_parent,
                             include_child_resources)
        self.common_components_conf = components_common_conf

        if work_dir:
            self.common_components_conf[
                'additional sources directory'] = os.path.join(
                    os.path.realpath(work_dir), 'additional sources')

        self.clade = None
        self.components = []
        self.component_processes = []

    def decide_job_or_sub_job(self):
        self.logger.info('Decide job/sub-job "{0}"'.format(self.id))

        # This is required to associate verification results with particular sub-jobs.
        # Skip leading "/" since this identifier is used in os.path.join() that returns absolute path otherwise.
        self.common_components_conf['sub-job identifier'] = self.id[1:]

        # Check and set build base here since many Core components need it.
        self.__set_build_base()
        self.clade = Clade(self.common_components_conf['build base'])
        if not self.clade.work_dir_ok():
            raise RuntimeError('Build base is not OK')

        self.__retrieve_working_src_trees()
        self.__get_original_sources_basic_info()
        self.__upload_original_sources()

        # Create directory where files will be cached and remember absolute path to it for components.
        os.mkdir('cache')
        self.common_components_conf['cache directory'] = os.path.realpath(
            'cache')

        if self.common_components_conf['keep intermediate files']:
            self.logger.debug(
                'Create components configuration file "conf.json"')
            with open('conf.json', 'w', encoding='utf8') as fp:
                json.dump(self.common_components_conf,
                          fp,
                          ensure_ascii=False,
                          sort_keys=True,
                          indent=4)

        self.__get_job_or_sub_job_components()
        self.callbacks = klever.core.components.get_component_callbacks(
            self.logger, [type(self)] + self.components)
        self.launch_sub_job_components()

        self.clean_dir = True
        self.logger.info("All components finished")
        if self.conf.get('collect total code coverage', None):
            self.logger.debug('Waiting for a collecting coverage')
            while not self.vals['coverage_finished'].get(
                    self.common_components_conf['sub-job identifier'], True):
                time.sleep(1)
            self.logger.debug("Coverage collected")

    main = decide_job_or_sub_job

    def __set_build_base(self):
        if 'build base' not in self.common_components_conf:
            raise KeyError(
                "Provide 'build base' configuration option to start verification"
            )

        common_advice = 'please, fix "job.json" (attribute "build base")'
        common_advice += ' or/and deployment configuration file (attribute "Klever Build Bases")'

        # Try to find specified build base either in normal way or additionally in directory "build bases" that is
        # convenient to use when working with many build bases.
        try:
            build_base = klever.core.utils.find_file_or_dir(
                self.logger, os.path.curdir,
                self.common_components_conf['build base'])
        except FileNotFoundError:
            try:
                build_base = klever.core.utils.find_file_or_dir(
                    self.logger, os.path.curdir,
                    os.path.join('build bases',
                                 self.common_components_conf['build base']))
            except FileNotFoundError:
                raise FileNotFoundError(
                    'Specified build base "{0}" does not exist, {1}'.format(
                        self.common_components_conf['build base'],
                        common_advice)) from None

        # Extract build base from archive. There should not be any intermediate directories in archives.
        if os.path.isfile(build_base) and (tarfile.is_tarfile(build_base)
                                           or zipfile.is_zipfile(build_base)):
            if tarfile.is_tarfile(build_base):
                self.logger.debug(
                    'Build base "{0}" is provided in form of TAR archive'.
                    format(build_base))
                with tarfile.open(build_base) as TarFile:
                    TarFile.extractall('build base')
            else:
                self.logger.debug(
                    'Build base "{0}" is provided in form of ZIP archive'.
                    format(build_base))
                with zipfile.ZipFile(build_base) as zfp:
                    zfp.extractall('build base')

            # Directory contains extracted build base.
            extracted_from = ' extracted from "{0}"'.format(
                os.path.realpath(build_base))
            build_base = 'build base'
        else:
            extracted_from = ''

        # We need to specify absolute path to build base since it will be used in different Klever components. Besides,
        # this simplifies troubleshooting.
        build_base = os.path.realpath(build_base)

        # TODO: fix after https://github.com/17451k/clade/issues/108.
        if not os.path.isdir(build_base):
            raise FileExistsError(
                'Build base "{0}" is not a directory, {1}'.format(
                    build_base, extracted_from, common_advice))

        if not os.path.isfile(os.path.join(build_base, 'meta.json')):
            raise FileExistsError(
                'Directory "{0}"{1} is not a build base since it does not contain file "meta.json", {2}'
                .format(build_base, extracted_from, common_advice))

        self.common_components_conf['build base'] = build_base

        self.logger.debug('Klever components will use build base "{0}"'.format(
            self.common_components_conf['build base']))

    # Klever will try to cut off either working source trees (if specified) or at least build directory (otherwise)
    # from referred file names. Sometimes this is rather optional like for source files referred by error traces, but,
    # say, for program fragment identifiers this is strictly necessary, e.g. because of otherwise expert assessment will
    # not work as expected.
    def __retrieve_working_src_trees(self):
        clade_meta = self.clade.get_meta()
        self.common_components_conf['working source trees'] = clade_meta['working source trees'] \
            if 'working source trees' in clade_meta else [clade_meta['build_dir']]

    def __refer_original_sources(self, src_id):
        klever.core.utils.report(self.logger, 'patch', {
            'identifier': self.id,
            'original_sources': src_id
        }, self.mqs['report files'], self.vals['report id'],
                                 self.conf['main working directory'])

    def __process_source_files(self):
        for file_name in self.clade.src_info:
            self.mqs['file names'].put(file_name)

        for i in range(self.workers_num):
            self.mqs['file names'].put(None)

    def __process_source_file(self):
        while True:
            file_name = self.mqs['file names'].get()

            if not file_name:
                return

            src_file_name = klever.core.utils.make_relative_path(
                self.common_components_conf['working source trees'], file_name)

            if src_file_name != file_name:
                src_file_name = os.path.join('source files', src_file_name)

            new_file_name = os.path.join('original sources',
                                         src_file_name.lstrip(os.path.sep))
            os.makedirs(os.path.dirname(new_file_name), exist_ok=True)
            shutil.copy(self.clade.get_storage_path(file_name), new_file_name)

            cross_refs = CrossRefs(
                self.common_components_conf, self.logger, self.clade,
                file_name, new_file_name,
                self.common_components_conf['working source trees'],
                'source files')
            cross_refs.get_cross_refs()

    def __get_original_sources_basic_info(self):
        self.logger.info(
            'Get information on original sources for following visualization of uncovered source files'
        )

        # For each source file we need to know the total number of lines and places where functions are defined.
        src_files_info = dict()
        for file_name, file_size in self.clade.src_info.items():
            src_file_name = klever.core.utils.make_relative_path(
                self.common_components_conf['working source trees'], file_name)

            # Skip non-source files.
            if src_file_name == file_name:
                continue

            src_file_name = os.path.join('source files', src_file_name)

            src_files_info[src_file_name] = list()

            # Store source file size.
            src_files_info[src_file_name].append(file_size['loc'])

            # Store source file function definition lines.
            func_def_lines = list()
            funcs = self.clade.get_functions_by_file([file_name], False)

            if funcs:
                for func_name, func_info in list(funcs.values())[0].items():
                    func_def_lines.append(int(func_info['line']))

            src_files_info[src_file_name].append(sorted(func_def_lines))

        # Dump obtain information (huge data!) to load it when reporting total code coverage if everything will be okay.
        with open('original sources basic information.json', 'w') as fp:
            klever.core.utils.json_dump(src_files_info, fp,
                                        self.conf['keep intermediate files'])

    def __upload_original_sources(self):
        # Use Clade UUID to distinguish various original sources. It is pretty well since this UUID is uuid.uuid4().
        src_id = self.clade.get_uuid()

        session = klever.core.session.Session(self.logger,
                                              self.conf['Klever Bridge'],
                                              self.conf['identifier'])

        if session.check_original_sources(src_id):
            self.logger.info('Original sources were uploaded already')
            self.__refer_original_sources(src_id)
            return

        self.logger.info(
            'Cut off working source trees or build directory from original source file names and convert index data'
        )
        os.makedirs('original sources')
        self.mqs['file names'] = multiprocessing.Queue()
        self.workers_num = klever.core.utils.get_parallel_threads_num(
            self.logger, self.conf)
        subcomponents = [('PSFS', self.__process_source_files)]
        for i in range(self.workers_num):
            subcomponents.append(('RSF', self.__process_source_file))
        self.launch_subcomponents(False, *subcomponents)
        self.mqs['file names'].close()

        self.logger.info('Compress original sources')
        klever.core.utils.ArchiveFiles(['original sources'
                                        ]).make_archive('original sources.zip')

        self.logger.info('Upload original sources')
        try:
            session.upload_original_sources(src_id, 'original sources.zip')
        # Do not fail if there are already original sources. There may be complex data races because of checking and
        # uploading original sources archive are not atomic.
        except klever.core.session.BridgeError:
            if "original sources with this identifier already exists." not in list(
                    session.error.values())[0]:
                raise

        self.__refer_original_sources(src_id)

        if not self.conf['keep intermediate files']:
            shutil.rmtree('original sources')
            os.remove('original sources.zip')

    def __get_job_or_sub_job_components(self):
        self.logger.info('Get components for sub-job "{0}"'.format(self.id))

        self.components = [
            getattr(
                importlib.import_module('.{0}'.format(component.lower()),
                                        'klever.core'), component)
            for component in self.CORE_COMPONENTS
        ]

        self.logger.debug('Components to be launched: "{0}"'.format(', '.join(
            [component.__name__ for component in self.components])))

    def launch_sub_job_components(self):
        """Has callbacks"""
        self.logger.info('Launch components for sub-job "{0}"'.format(self.id))

        for component in self.components:
            p = component(self.common_components_conf,
                          self.logger,
                          self.id,
                          self.callbacks,
                          self.mqs,
                          self.vals,
                          separate_from_parent=True)
            self.component_processes.append(p)

        klever.core.components.launch_workers(self.logger,
                                              self.component_processes)
示例#3
0
class Job(klever.core.components.Component):
    CORE_COMPONENTS = [
        'PFG',
        'VTG',
        'VRP'
    ]

    def __init__(self, conf, logger, parent_id, callbacks, mqs, vals, id=None, work_dir=None, attrs=None,
                 separate_from_parent=True, include_child_resources=False, components_common_conf=None):
        super(Job, self).__init__(conf, logger, parent_id, callbacks, mqs, vals, id, work_dir, attrs,
                                  separate_from_parent, include_child_resources)
        self.common_components_conf = components_common_conf

        if work_dir:
            self.common_components_conf['additional sources directory'] = os.path.join(os.path.realpath(work_dir),
                                                                                       'additional sources')

        self.clade = None
        self.components = []
        self.component_processes = []

    def decide_job_or_sub_job(self):
        self.logger.info('Decide job/sub-job "{0}"'.format(self.id))

        # This is required to associate verification results with particular sub-jobs.
        # Skip leading "/" since this identifier is used in os.path.join() that returns absolute path otherwise.
        self.common_components_conf['sub-job identifier'] = self.id[1:]

        self.logger.info('Get specifications set')
        if 'specifications set' in self.common_components_conf:
            spec_set = self.common_components_conf['specifications set']
        else:
            raise KeyError('Specify attribute "specifications set" within job.json')
        self.logger.debug('Specifications set is "{0}"'.format(spec_set))

        # Check that specifications set is supported.
        with open(self.common_components_conf['specifications base'], encoding='utf-8') as fp:
            req_spec_base = json.load(fp)
        spec_set = self.common_components_conf['specifications set']
        if spec_set not in req_spec_base['specification sets']:
            raise ValueError("Klever does not support specifications set {!r} yet, available options are: {}"
                             .format(spec_set, ', '.join(req_spec_base['specification sets'])))

        # Check and set build base here since many Core components need it.
        self.__set_build_base()
        self.clade = Clade(self.common_components_conf['build base'])
        if not self.clade.work_dir_ok():
            raise RuntimeError(f'Build base "{self.common_components_conf["build base"]}" is not OK')

        self.__retrieve_working_src_trees()
        self.__get_original_sources_basic_info()
        self.__upload_original_sources()

        # Create directory where files will be cached and remember absolute path to it for components.
        os.mkdir('cache')
        self.common_components_conf['cache directory'] = os.path.realpath('cache')

        if self.common_components_conf['keep intermediate files']:
            self.logger.debug('Create components configuration file "conf.json"')
            with open('conf.json', 'w', encoding='utf-8') as fp:
                json.dump(self.common_components_conf, fp, ensure_ascii=False, sort_keys=True, indent=4)

        self.__get_job_or_sub_job_components()
        self.callbacks = klever.core.components.get_component_callbacks(self.logger, [type(self)] + self.components)
        self.launch_sub_job_components()

        self.clean_dir = True
        self.logger.info("All components finished")
        if self.conf.get('collect total code coverage', None):
            self.logger.debug('Waiting for a collecting coverage')
            while not self.vals['coverage_finished'].get(self.common_components_conf['sub-job identifier'], True):
                time.sleep(1)
            self.logger.debug("Coverage collected")

    main = decide_job_or_sub_job

    def __set_build_base(self):
        if 'build base' not in self.common_components_conf:
            raise KeyError("Provide 'build base' configuration option to start verification")

        common_advice = 'please, fix "job.json" (attribute "build base")'
        common_advice += ' or/and deployment configuration file (attribute "Klever Build Bases")'

        # Try to find specified build base either in normal way or additionally in directory "build bases" that is
        # convenient to use when working with many build bases.
        try:
            build_base = klever.core.utils.find_file_or_dir(self.logger,
                                                            self.common_components_conf['main working directory'],
                                                            self.common_components_conf['build base'])
        except FileNotFoundError as e:
            self.logger.warning('Failed to find build base:\n{}'.format(traceback.format_exc().rstrip()))
            try:
                build_base = klever.core.utils.find_file_or_dir(
                    self.logger, self.common_components_conf['main working directory'],
                    os.path.join('build bases', self.common_components_conf['build base']))
            except FileNotFoundError as e:
                self.logger.warning('Failed to find build base:\n{}'.format(traceback.format_exc().rstrip()))
                raise FileNotFoundError(
                    'Specified build base "{0}" does not exist, {1}'.format(self.common_components_conf['build base'],
                                                                            common_advice)) from None

        # Extract build base from archive. There should not be any intermediate directories in archives.
        if os.path.isfile(build_base) and (tarfile.is_tarfile(build_base) or zipfile.is_zipfile(build_base)):
            if tarfile.is_tarfile(build_base):
                self.logger.debug('Build base "{0}" is provided in form of TAR archive'.format(build_base))
                with tarfile.open(build_base) as TarFile:
                    TarFile.extractall('build base')
            else:
                self.logger.debug('Build base "{0}" is provided in form of ZIP archive'.format(build_base))
                with zipfile.ZipFile(build_base) as zfp:
                    zfp.extractall('build base')

            # Directory contains extracted build base.
            extracted_from = ' extracted from "{0}"'.format(os.path.realpath(build_base))
            build_base = 'build base'
        else:
            extracted_from = ''

        # We need to specify absolute path to build base since it will be used in different Klever components. Besides,
        # this simplifies troubleshooting.
        build_base = os.path.realpath(build_base)

        # TODO: fix after https://github.com/17451k/clade/issues/108.
        if not os.path.isdir(build_base):
            raise FileExistsError('Build base "{0}" is not a directory, {1}'
                                  .format(build_base, extracted_from, common_advice))

        if not os.path.isfile(os.path.join(build_base, 'meta.json')):
            raise FileExistsError(
                'Directory "{0}"{1} is not a build base since it does not contain file "meta.json", {2}'
                .format(build_base, extracted_from, common_advice))

        self.common_components_conf['build base'] = build_base

        self.logger.debug('Klever components will use build base "{0}"'
                          .format(self.common_components_conf['build base']))

    # Klever will try to cut off either working source trees (if specified) or maximum common paths of CC/CL input files
    # and LD/Link output files (otherwise) from referred file names. Sometimes this is rather optional like for source
    # files referred by error traces, but, say, for program fragment identifiers this is strictly necessary, e.g.
    # because of otherwise expert assessment will not work as expected.
    def __retrieve_working_src_trees(self):
        clade_meta = self.clade.get_meta()

        # Best of all if users specify working source trees in build bases manually themselves. It is a most accurate
        # approach.
        if 'working source trees' in clade_meta:
            work_src_trees = clade_meta['working source trees']
        # Otherwise try to find out them automatically as described above.
        else:
            in_files = []
            for cmd in self.clade.get_all_cmds_by_type("CC") + self.clade.get_all_cmds_by_type("CL"):
                if cmd['in']:
                    for in_file in cmd['in']:
                        # Sometimes some auxiliary stuff is built in addition to normal C source files that are most
                        # likely located in a place we would like to get.
                        if not in_file.startswith('/tmp') and in_file != '/dev/null':
                            in_files.append(os.path.join(cmd['cwd'], in_file))
            in_files_prefix = os.path.dirname(os.path.commonprefix(in_files))
            self.logger.info('Common prefix of CC/CL input files is "{0}"'.format(in_files_prefix))

            out_files = []
            for cmd in self.clade.get_all_cmds_by_type("LD") + self.clade.get_all_cmds_by_type("Link"):
                if cmd['out']:
                    for out_file in cmd['out']:
                        # Like above.
                        if not out_file.startswith('/tmp') and out_file != '/dev/null':
                            out_files.append(os.path.join(cmd['cwd'], out_file))
            out_files_prefix = os.path.dirname(os.path.commonprefix(out_files))
            self.logger.info('Common prefix of LD/Link output files is "{0}"'.format(out_files_prefix))

            # Meaningful paths look like "/dir...".
            meaningful_paths = []
            for path in (in_files_prefix, out_files_prefix):
                if path and path != os.path.sep and path not in meaningful_paths:
                    meaningful_paths.append(path)

            if meaningful_paths:
                work_src_trees = meaningful_paths
            # At least consider build directory as working source tree if the automatic procedure fails.
            else:
                self.logger.warning(
                    'Consider build directory "{0}" as working source tree.'
                    'This may be dangerous and we recommend to specify appropriate working source trees manually!'
                    .format(clade_meta['build_dir']))
                work_src_trees = [clade_meta['build_dir']]

        # Consider minimal path if it is common prefix for other ones. For instance, if we have "/dir1/dir2" and "/dir1"
        # then "/dir1" will become the only working source tree.
        if len(work_src_trees) > 1:
            min_work_src_tree = min(work_src_trees)
            if os.path.commonprefix(work_src_trees) == min_work_src_tree:
                work_src_trees = [min_work_src_tree]

        self.logger.info(
            'Working source trees to be used are as follows:\n{0}'
            .format('\n'.join(['  {0}'.format(t) for t in work_src_trees])))
        self.common_components_conf['working source trees'] = work_src_trees

    def __refer_original_sources(self, src_id):
        klever.core.utils.report(
            self.logger,
            'patch',
            {
                'identifier': self.id,
                'original_sources': src_id
            },
            self.mqs['report files'],
            self.vals['report id'],
            self.conf['main working directory']
        )

    def __process_source_files(self):
        for file_name in self.clade.src_info:
            self.mqs['file names'].put(file_name)

        for i in range(self.workers_num):
            self.mqs['file names'].put(None)

    def __process_source_file(self):
        while True:
            file_name = self.mqs['file names'].get()

            if not file_name:
                return

            src_file_name = klever.core.utils.make_relative_path(self.common_components_conf['working source trees'],
                                                                 file_name)

            if src_file_name != file_name:
                src_file_name = os.path.join('source files', src_file_name)

            new_file_name = os.path.join('original sources', src_file_name.lstrip(os.path.sep))
            os.makedirs(os.path.dirname(new_file_name), exist_ok=True)
            shutil.copy(self.clade.get_storage_path(file_name), new_file_name)

            cross_refs = CrossRefs(self.common_components_conf, self.logger, self.clade,
                                   file_name, new_file_name,
                                   self.common_components_conf['working source trees'], 'source files')
            cross_refs.get_cross_refs()

    def __get_original_sources_basic_info(self):
        self.logger.info('Get information on original sources for following visualization of uncovered source files')

        # For each source file we need to know the total number of lines and places where functions are defined.
        src_files_info = dict()
        for file_name, file_size in self.clade.src_info.items():
            src_file_name = klever.core.utils.make_relative_path(self.common_components_conf['working source trees'],
                                                                 file_name)

            # Skip non-source files.
            if src_file_name == file_name:
                continue

            src_file_name = os.path.join('source files', src_file_name)

            src_files_info[src_file_name] = list()

            # Store source file size.
            src_files_info[src_file_name].append(file_size['loc'])

            # Store source file function definition lines.
            func_def_lines = list()
            funcs = self.clade.get_functions_by_file([file_name], False)

            if funcs:
                for func_name, func_info in list(funcs.values())[0].items():
                    func_def_lines.append(int(func_info['line']))

            src_files_info[src_file_name].append(sorted(func_def_lines))

        # Dump obtain information (huge data!) to load it when reporting total code coverage if everything will be okay.
        with open('original sources basic information.json', 'w') as fp:
            klever.core.utils.json_dump(src_files_info, fp, self.conf['keep intermediate files'])

    def __upload_original_sources(self):
        # Use Clade UUID to distinguish various original sources. It is pretty well since this UUID is uuid.uuid4().
        src_id = self.clade.get_uuid()
        # In addition, take into account a meta content as we like to change it manually often. In this case it may be
        # necessary to re-index the build base. It is not clear if this is the case actually, so, do this in case of
        # any changes in meta.
        src_id += '-' + klever.core.utils.get_file_name_checksum(json.dumps(self.clade.get_meta()))[:12]

        session = klever.core.session.Session(self.logger, self.conf['Klever Bridge'], self.conf['identifier'])

        if session.check_original_sources(src_id):
            self.logger.info('Original sources were uploaded already')
            self.__refer_original_sources(src_id)
            return

        self.logger.info(
            'Cut off working source trees or build directory from original source file names and convert index data')
        os.makedirs('original sources')
        self.mqs['file names'] = multiprocessing.Queue()
        self.workers_num = klever.core.utils.get_parallel_threads_num(self.logger, self.conf)
        subcomponents = [('PSFS', self.__process_source_files)]
        for i in range(self.workers_num):
            subcomponents.append(('PSF', self.__process_source_file))
        self.launch_subcomponents(False, *subcomponents)
        self.mqs['file names'].close()

        self.logger.info('Compress original sources')
        klever.core.utils.ArchiveFiles(['original sources']).make_archive('original sources.zip')

        self.logger.info('Upload original sources')
        try:
            session.upload_original_sources(src_id, 'original sources.zip')
        # Do not fail if there are already original sources. There may be complex data races because of checking and
        # uploading original sources archive are not atomic.
        except klever.core.session.BridgeError:
            if "original sources with this identifier already exists." not in list(session.error.values())[0]:
                raise

        self.__refer_original_sources(src_id)

        if not self.conf['keep intermediate files']:
            shutil.rmtree('original sources')
            os.remove('original sources.zip')

    def __get_job_or_sub_job_components(self):
        self.logger.info('Get components for sub-job "{0}"'.format(self.id))

        self.components = [getattr(importlib.import_module('.{0}'.format(component.lower()), 'klever.core'), component)
                           for component in self.CORE_COMPONENTS]

        self.logger.debug('Components to be launched: "{0}"'.format(
            ', '.join([component.__name__ for component in self.components])))

    def launch_sub_job_components(self):
        """Has callbacks"""
        self.logger.info('Launch components for sub-job "{0}"'.format(self.id))

        for component in self.components:
            p = component(self.common_components_conf, self.logger, self.id, self.callbacks, self.mqs,
                          self.vals, separate_from_parent=True)
            self.component_processes.append(p)

        klever.core.components.launch_workers(self.logger, self.component_processes)