Пример #1
0
 def __run_p4a_process(self):
     self.files_to_compile += [
         file_dict['file_full_path'] for file_dict in self.get_file_list()
     ]
     command = 'PATH=/bin:$PATH p4a -vv ' + ' '.join(self.files_to_compile)
     if self.extra_files:
         command += f' {" ".join(self.extra_files)}'
     command += ' ' + ' '.join(map(str, super().get_compilation_flags()))
     if self.include_dirs_list:
         command += ' -I ' + ' -I '.join(
             map(
                 lambda x: os.path.join(self.get_input_file_directory(),
                                        str(x)), self.include_dirs_list))
     try:
         logger.info(f'{Par4all.__name__}: start parallelizing')
         stdout, stderr, ret_code = run_subprocess([
             command,
         ], self.get_input_file_directory())
         log_file_path = os.path.join(self.get_input_file_directory(),
                                      Par4allConfig.LOG_FILE_NAME)
         logger.log_to_file(f'{stdout}\n{stderr}', log_file_path)
         logger.debug(f'{Par4all.__name__}: {stdout}')
         logger.debug_error(f'{Par4all.__name__}: {stderr}')
         logger.info(f'{Par4all.__name__}: finished parallelizing')
     except subprocess.CalledProcessError as e:
         log_file_path = os.path.join(self.get_input_file_directory(),
                                      Par4allConfig.LOG_FILE_NAME)
         logger.log_to_file(f'{e.output}\n{e.stderr}', log_file_path)
         raise CombinationFailure(
             f'par4all return with {e.returncode} code: {str(e)} : {e.output} : {e.stderr}'
         )
     except Exception as e:
         raise CompilationError(
             f"{e}\nfiles in directory {self.get_input_file_directory()} failed to be parallel!"
         )
Пример #2
0
    def compile(self):
        super().compile()
        try:
            for file in self.get_file_list():
                Cetus.replace_line_in_code(file["file_full_path"], GlobalsConfig.OMP_HEADER, '')
                cwd_path = os.path.dirname(file["file_full_path"])
                self.copy_headers(cwd_path)
                logger.info(f'{Cetus.__name__}: start parallelizing {file["file_name"]}')
                command = [f'cetus {" ".join(self.get_compilation_flags())} {file["file_name"]}']
                stdout, stderr, ret_code = run_subprocess(command, cwd_path)
                log_file_path = f'{os.path.splitext(file["file_full_path"])[0]}{CetusConfig.LOG_FILE_SUFFIX}'
                logger.log_to_file(f'{stdout}\n{stderr}', log_file_path)
                logger.debug(f'{Cetus.__name__}: {stdout}')
                logger.debug_error(f'{Cetus.__name__}: {stderr}')
                logger.info(f'{Cetus.__name__}: finished parallelizing {file["file_name"]}')
                # Replace file from cetus output folder into original file folder
                if os.path.isdir(os.path.join(cwd_path, CetusConfig.OUTPUT_DIR_NAME)):
                    src_file = os.path.join(cwd_path, CetusConfig.OUTPUT_DIR_NAME, file["file_name"])
                    dst_file = file["file_full_path"]
                    shutil.copy(src_file, dst_file)
                    shutil.rmtree(os.path.join(cwd_path, CetusConfig.OUTPUT_DIR_NAME))

                Cetus.inject_line_in_code(file["file_full_path"], GlobalsConfig.OMP_HEADER)
            return True
        except subprocess.CalledProcessError as ex:
            log_file_path = f'{os.path.splitext(file["file_full_path"])[0]}{CetusConfig.LOG_FILE_SUFFIX}'
            logger.log_to_file(f'{ex.output}\n{ex.stderr}', log_file_path)
            raise CombinationFailure(f'cetus return with {ex.returncode} code: {str(ex)} : {ex.output} : {ex.stderr}')
        except Exception as ex:
            raise CompilationError(str(ex) + " files in directory " + self.get_input_file_directory() +
                                   " failed to be parallel!")
 def run_parallel_combinations(self):
     logger.info('Start to work on parallel combinations')
     self.parallel_jobs_pool_executor.create_jobs_pool()
     # if equal to one - we don't need to concatenate the number of repetitions to combination id nor calculate avg
     is_multiple_combinations = self.multiple_combinations > 1
     for combination_json in self.db.combinations_iterator():
         original_combination_obj = Combination.json_to_obj(combination_json)
         logger.info(LogPhrases.NEW_COMBINATION.format(original_combination_obj.combination_id))
         for i in range(self.multiple_combinations):
             if is_multiple_combinations:
                 combination_obj = copy.deepcopy(original_combination_obj)
                 combination_obj.combination_id = f'{combination_obj.combination_id}_{i}'
                 logger.info(f'#{i} repetition of {original_combination_obj.combination_id} combination')
             else:
                 combination_obj = original_combination_obj
             combination_folder_path = self.create_combination_folder(str(combination_obj.get_combination_id()))
             try:
                 self.parallel_compilation_of_one_combination(combination_obj, combination_folder_path)
                 self.compile_combination_to_binary(combination_folder_path)
             except Exception as ex:
                 logger.info_error(f'Exception at {Compar.__name__}: {ex}')
                 logger.debug_error(f'{traceback.format_exc()}')
                 self.save_combination_as_failure(combination_obj.get_combination_id(), str(ex),
                                                  combination_folder_path)
                 continue
             job = Job(combination_folder_path, combination_obj, self.main_file_parameters)
             self.parallel_jobs_pool_executor.run_job_in_thread(self.run_and_save_job, job)
     self.parallel_jobs_pool_executor.wait_and_finish_pool()
     if is_multiple_combinations:
         self.calculate_multiple_combinations_average()
     logger.info('Finish to work on all the parallel combinations')
Пример #4
0
 def delete_combination(self, combination_id: str):
     try:
         self.dynamic_db[self.collection_name].delete_one({"_id": combination_id})
         return True
     except Exception as e:
         logger.info_error(f'Exception at {Database.__name__}: Could not delete combination: {e}')
         logger.debug_error(f'{traceback.format_exc()}')
         return False
Пример #5
0
 def run_makefile(self):
     logger.info(f'{Makefile.__name__}: started running makefile')
     command = ' && '.join(self.commands)
     stdout, stderr, ret_code = run_subprocess(command,
                                               self.working_directory)
     logger.debug(f'{Makefile.__name__}: {stdout}')
     logger.debug_error(f'{Makefile.__name__}: {stderr}')
     logger.info(f'{Makefile.__name__}: finished running makefile')
Пример #6
0
 def insert_new_combination_results(self, combination_result: dict):
     try:
         self.dynamic_db[self.collection_name].insert_one(combination_result)
         return True
     except Exception as e:
         logger.info_error(f'{Database.__name__}: cannot update dynamic DB: {e}')
         logger.debug_error(f'{traceback.format_exc()}')
         return False
Пример #7
0
 def get_combination_results(self, combination_id: str):
     combination = None
     try:
         combination = self.dynamic_db[self.collection_name].find_one({"_id": combination_id})
     except Exception as e:
         logger.info_error(f'Exception at {Database.__name__}: Could not find results for combination: {e}')
         logger.debug_error(f'{traceback.format_exc()}')
     finally:
         return combination
Пример #8
0
 def run_and_save_job(self, job_obj: Job):
     try:
         job_obj = self.execute_job(job_obj, self.serial_run_time)
     except Exception as ex:
         logger.info_error(f'Exception at {Compar.__name__}: {ex}')
         logger.debug_error(f'{traceback.format_exc()}')
     finally:
         if not self.save_combinations_folders:
             self.__delete_combination_folder(job_obj.get_directory_path())
Пример #9
0
    def run_compiler(self):
        input_file_path_only = os.path.dirname(self.get_input_file_directory() + os.path.sep)
        dir_name = os.path.basename(input_file_path_only)

        logger.info(f'{BinaryCompiler.__name__}: start to compiling {self.get_main_c_file()}')
        command = [self.get_compiler_name(), "-fopenmp"] + self.get_compilation_flags()
        command += [self.get_main_c_file(), "-o", dir_name + ".x"]
        stdout, stderr, ret_code = run_subprocess(command, self.get_input_file_directory())
        logger.debug(f'{BinaryCompiler.__name__}: {stdout}')
        logger.debug_error(f'{BinaryCompiler.__name__}: {stderr}')
        logger.info(f'{BinaryCompiler.__name__}: finished compiling {self.get_main_c_file()}')
Пример #10
0
    def run_compiler(self):
        input_file_path_only = os.path.dirname(self.get_input_file_directory() + os.path.sep)
        dir_name = os.path.basename(input_file_path_only)

        logger.info(f'{Icc.__name__}: start to compiling {self.get_main_c_file()}')
        stdout, stderr, ret_code = run_subprocess([self.get_compiler_name()] + ["-fopenmp"]
                                                  + self.get_compilation_flags() + [self.get_main_c_file()]
                                                  + ["-o"] + [dir_name + ".x"], self.get_input_file_directory())
        logger.debug(stdout)
        logger.debug_error(stderr)
        logger.info(f'{Icc.__name__}: finished compiling {self.get_main_c_file()}')
Пример #11
0
 def __remove_bswap_function(file_path: str):
     bswap_regex = re.compile(r'static __uint64_t __bswap_64[^\}]*\}',
                              flags=re.DOTALL)
     try:
         with open(file_path, 'r+') as f:
             content = f.read()
             if bswap_regex.match(content):
                 content = bswap_regex.sub('', content)
                 f.seek(0)
                 f.write(content)
                 f.truncate()
     except Exception as e:
         logger.info_error(f'Exception at {Par4all.__name__}: {e}')
         logger.debug_error(f'{traceback.format_exc()}')
Пример #12
0
 def run_autopar(self, file_name: str, file_full_path: str, options: list):
     logger.info(f'{Autopar.__name__}: started parallelizing {file_name}')
     command = 'autoPar'
     if self.include_dirs_list:
         command += ' -I' + ' -I'.join(map(lambda x: os.path.join(self.get_input_file_directory(), str(x)),
                                           self.include_dirs_list))
     command += f' {" ".join(options)} -c {file_name}'
     stdout, stderr, ret_code = run_subprocess([command], os.path.dirname(file_full_path))
     log_file_path = f'{os.path.splitext(file_full_path)[0]}{AutoParConfig.LOG_FILE_SUFFIX}'
     logger.log_to_file(f'{stdout}\n{stderr}', log_file_path)
     dir_path, file_name = os.path.split(file_full_path)
     parallel_file_full_path = os.path.join(dir_path, f'{AutoParConfig.OUTPUT_FILE_NAME_PREFIX}{file_name}')
     if os.path.exists(parallel_file_full_path):
         os.remove(file_full_path)
         os.rename(parallel_file_full_path, file_full_path)
     logger.debug(f'{Autopar.__name__}: {stdout}')
     logger.debug_error(f'{Autopar.__name__}: {stderr}')
     logger.info(f'{Autopar.__name__}: finished parallelizing {file_name}')
Пример #13
0
 def get_combination_from_static_db(self, combination_id: str):
     combination = None
     if combination_id == self.SERIAL_COMBINATION_ID:
         return {
             "_id": Database.SERIAL_COMBINATION_ID,
             "compiler_name": Database.SERIAL_COMBINATION_ID,
             "parameters": {
                 "omp_rtl_params": [],
                 "omp_directives_params": [],
                 "compilation_params": []
             }
         }
     try:
         combination = self.static_db[self.collection_name].find_one({"_id": combination_id})
     except Exception as e:
         logger.info_error(f'Exception at {Database.__name__}: Could not find combination: {e}')
         logger.debug_error(f'{traceback.format_exc()}')
     finally:
         return combination
Пример #14
0
 def __run_user_script(self, script_name: str):
     json_script_file_path = os.path.join(GlobalsConfig.ASSETS_DIR_PATH,
                                          script_name)
     if os.path.exists(json_script_file_path):
         with open(json_script_file_path, 'r') as f:
             json_content = json.load(f)
         if self.NAME in json_content:
             user_script_path = json_content[self.NAME]
             if os.path.exists(user_script_path):
                 try:
                     script_command = f'{user_script_path} {self.get_input_file_directory()}'
                     std_out, std_err, ret_code = run_subprocess(
                         script_command)
                     logger.debug(std_out)
                     logger.debug_error(std_err)
                 except subprocess.CalledProcessError as e:
                     logger.info_error(
                         f'{self.NAME}: user {script_name} script return with {e.returncode}: {e}'
                     )
                     logger.info(e.output)
                     logger.info_error(e.stderr)
Пример #15
0
 def initialize_static_db(self):
     try:
         combinations = generate_combinations()
         num_of_parallel_combinations = len(combinations)
         for combination in combinations:
             curr_combination_id = Database.generate_combination_id(combination)
             self.static_db[self.collection_name].update_one(
                 filter={
                     '_id': curr_combination_id
                 },
                 update={
                     '$setOnInsert': combination
                 },
                 upsert=True
             )
         return num_of_parallel_combinations
     except Exception as e:
         logger.info_error(f'Exception at {Database.__name__}: cannot initialize static DB: {e}')
         logger.debug_error(f'{traceback.format_exc()}')
         raise DatabaseError()
     finally:
         del combinations
Пример #16
0
        slurm_parameters=args.slurm_parameters,
        extra_files=args.extra_files,
        main_file_rel_path=args.main_file_rel_path,
        time_limit=args.time_limit,
        slurm_partition=args.slurm_partition,
        test_file_path=args.test_file_path,
        mode=args.mode,
        code_with_markers=args.code_with_markers,
        clear_db=args.clear_db,
        multiple_combinations=args.multiple_combinations,
        log_level=args.log_level)
    try:
        compar_obj.fragment_and_add_timers()
        compar_obj.run_serial()
        compar_obj.run_parallel_combinations()
        compar_obj.generate_optimal_code()
        logger.info('Finish Compar execution')
    except Exception:
        if args.clear_db:
            compar_obj.clear_related_collections()
        raise


if __name__ == "__main__":
    try:
        main()
    except Exception as e:
        logger.info_error(f'Exception at Compar Program: {e}')
        logger.debug_error(traceback.format_exc())
        exit(1)
Пример #17
0
    def __run_with_sbatch(self, user_slurm_parameters: list):
        logger.info(
            f'Start running {self.get_job().get_combination().get_combination_id()} combination'
        )
        slurm_parameters = user_slurm_parameters
        dir_path = self.get_job().get_directory_path()
        dir_name = os.path.basename(dir_path)
        x_file = dir_name + MakefileConfig.EXE_FILE_EXTENSION
        sbatch_script_file = self.__make_sbatch_script_file(x_file)

        log_file = dir_name + GlobalsConfig.LOG_EXTENSION
        x_file_path = os.path.join(dir_path, x_file)
        log_file_path = os.path.join(dir_path, log_file)
        slurm_parameters = " ".join(slurm_parameters)
        cmd = f'sbatch {slurm_parameters} -o {log_file_path} {sbatch_script_file} {x_file_path}'
        if self.get_job().get_exec_file_args():
            cmd += f' {" ".join([str(arg) for arg in self.get_job().get_exec_file_args()])} '
        stdout = ""
        batch_job_sent = False
        while not batch_job_sent:
            try:
                stdout, stderr, ret_code = run_subprocess(cmd)
                batch_job_sent = True
            except subprocess.CalledProcessError as ex:
                logger.info_error(
                    f'Exception at {ExecuteJob.__name__}: {ex}\n{ex.output}\n{ex.stderr}'
                )
                logger.debug_error(f'{traceback.format_exc()}')
                logger.info_error(
                    'sbatch command not responding (slurm is down?)')
                time.sleep(
                    ExecuteJobConfig.TRY_SLURM_RECOVERY_AGAIN_SECOND_TIME)
        result = stdout
        # set job id
        result = re.findall('[0-9]', str(result))
        result = ''.join(result)
        self.get_job().set_job_id(result)
        logger.info(
            LogPhrases.JOB_SENT_TO_SLURM.format(self.get_job().get_job_id()))
        cmd = f"squeue -j {self.get_job().get_job_id()} --format %t"
        last_status = ''
        is_first_time = True
        is_finish = False
        while not is_finish:
            try:
                stdout, stderr = '', ''
                try:
                    stdout, stderr, ret_code = run_subprocess(cmd)
                except subprocess.CalledProcessError:  # check if squeue is not working or if the job finished
                    _, _, ret_code = run_subprocess('squeue')
                    if ret_code != 0:
                        raise
                    else:
                        is_finish = True
                current_status = ''
                try:
                    current_status = stdout.split('\n')[1]
                except IndexError:
                    if not is_finish:
                        logger.info_error(
                            f'Warning: check the squeue command output: {stdout} {stderr}'
                        )
                        time.sleep(ExecuteJobConfig.
                                   TRY_SLURM_RECOVERY_AGAIN_SECOND_TIME)
                        continue
                if current_status != last_status and current_status != '':
                    logger.info(
                        f'Job {self.get_job().get_job_id()} status is {current_status}'
                    )
                    last_status = current_status
                if not is_finish and not is_first_time:
                    # not is_first_time - some times the job go to COMPLETE immediately (fast running)
                    time.sleep(ExecuteJobConfig.CHECK_SQUEUE_SECOND_TIME)
                if is_first_time:
                    is_first_time = False
            except subprocess.CalledProcessError as ex:  # squeue command not responding (slurm is down?)
                logger.info_error(
                    f'Exception at {ExecuteJob.__name__}: {ex}\n{ex.stdout}\n{ex.stderr}'
                )
                logger.debug_error(f'{traceback.format_exc()}')
                logger.info_error(
                    'squeue command not responding (slurm is down?)')
                time.sleep(
                    ExecuteJobConfig.TRY_SLURM_RECOVERY_AGAIN_SECOND_TIME)
        logger.info(
            LogPhrases.JOB_IS_COMPLETE.format(self.get_job().get_job_id()))