def rerun_job_if_exit_code_140(run_file): """Find files that exited because walltime exceeed and run again with twice the walltime""" search_string = 'Exited with exit code 140.' files, job_files = find_completed_jobs_matching_search_string( run_file, search_string) if len(files) == 0: return rerun_file = create_rerun_run_file(job_files) wall_time = extract_walltime_from_job_file(job_files[0]) memory = extract_memory_from_job_file(job_files[0]) new_wall_time = multiply_walltime(wall_time, factor=2) print(new_wall_time) print(memory) for file in files: os.remove(file.replace('.o', '.e')) move_out_job_files_to_stdout(run_file) # renaming of stdout dir as otherwise it will get deleted in later move_out_job_files_to_stdout step stdout_dir = os.path.dirname(run_file) + '/stdout_' + os.path.basename( run_file) os.rename(stdout_dir, stdout_dir + '_pre_rerun') remove_last_job_running_products(run_file) queuename = os.getenv('QUEUENAME') jobs = js.submit_batch_jobs(batch_file=rerun_file, out_dir=os.path.dirname(rerun_file), work_dir=os.path.dirname( os.path.dirname(rerun_file)), memory=memory, walltime=new_wall_time, queue=queuename) remove_zero_size_or_length_error_files(run_file=rerun_file) raise_exception_if_job_exited(run_file=rerun_file) concatenate_error_files(run_file=rerun_file, work_dir=os.path.dirname( os.path.dirname(run_file))) move_out_job_files_to_stdout(rerun_file)
def main(iargs=None): inps = putils.cmd_line_parse(iargs, script='execute_runfiles') os.chdir(inps.work_dir) config = putils.get_config_defaults(config_file='job_defaults.cfg') job_file_name = 'execute_runfiles' job_name = job_file_name if inps.wall_time == 'None': inps.wall_time = config[job_file_name]['walltime'] wait_seconds, new_wall_time = putils.add_pause_to_walltime( inps.wall_time, inps.wait_time) ######################################### # Submit job ######################################### if inps.submit_flag: js.submit_script(job_name, job_file_name, sys.argv[:], inps.work_dir, new_wall_time) sys.exit(0) time.sleep(wait_seconds) command_line = os.path.basename(sys.argv[0]) + ' ' + ' '.join(sys.argv[1:]) message_rsmas.log(inps.work_dir, command_line) run_file_list = putils.read_run_list(inps.work_dir) if inps.endrun == 0: inps.endrun = len(run_file_list) if not inps.startrun == 0: inps.startrun = inps.startrun - 1 run_file_list = run_file_list[inps.startrun:inps.endrun] if os.getenv('JOBSCHEDULER') == 'LSF' or os.getenv( 'JOBSCHEDULER') == 'PBS': for item in run_file_list: step_name = '_' step_name = step_name.join(item.split('_')[3::]) try: memorymax = config[step_name]['memory'] except: memorymax = config['DEFAULT']['memory'] try: if config[step_name]['adjust'] == 'True': walltimelimit = putils.walltime_adjust( inps, config[step_name]['walltime']) else: walltimelimit = config[step_name]['walltime'] except: walltimelimit = config['DEFAULT']['walltime'] queuename = os.getenv('QUEUENAME') putils.remove_last_job_running_products(run_file=item) jobs = js.submit_batch_jobs(batch_file=item, out_dir=os.path.join( inps.work_dir, 'run_files'), work_dir=inps.work_dir, memory=memorymax, walltime=walltimelimit, queue=queuename) putils.remove_zero_size_or_length_error_files(run_file=item) putils.raise_exception_if_job_exited(run_file=item) putils.concatenate_error_files(run_file=item, work_dir=inps.work_dir) putils.move_out_job_files_to_stdout(run_file=item) else: for item in run_file_list: with open(item, 'r') as f: command_lines = f.readlines() for command_line in command_lines: os.system(command_line) return None
def main(iargs=None): inps = putils.cmd_line_parse(iargs, script='execute_runfiles') os.chdir(inps.work_dir) config = putils.get_config_defaults(config_file='job_defaults.cfg') job_file_name = 'execute_runfiles' job_name = job_file_name if inps.wall_time == 'None': inps.wall_time = config[job_file_name]['walltime'] wait_seconds, new_wall_time = putils.add_pause_to_walltime( inps.wall_time, inps.wait_time) ######################################### # Submit job ######################################### if inps.submit_flag: js.submit_script(job_name, job_file_name, sys.argv[:], inps.work_dir, new_wall_time) sys.exit(0) time.sleep(wait_seconds) run_file_list = putils.read_run_list(inps.work_dir) if inps.end_run == 0: inps.end_run = len(run_file_list) if not inps.start_run == 0: inps.start_run = inps.start_run - 1 if not iargs is None: message_rsmas.log( inps.work_dir, os.path.basename(__file__) + ' ' + ' '.join(iargs[:])) else: message_rsmas.log( inps.work_dir, os.path.basename(__file__) + ' ' + ' '.join(sys.argv[1::])) run_file_list = run_file_list[inps.start_run:inps.end_run] supported_schedulers = ['LSF', 'PBS', 'SLURM'] if os.getenv('JOBSCHEDULER') in supported_schedulers: for item in run_file_list: step_name = '_' step_name = step_name.join(item.split('_')[3::]) try: memorymax = config[step_name]['memory'] except: memorymax = config['DEFAULT']['memory'] try: # FA 26 Dec commented out as it seemed wrong #if config[step_name]['adjust'] == 'True': # walltimelimit = putils.walltime_adjust(inps, config[step_name]['walltime']) #else: # walltimelimit = config[step_name]['walltime'] walltimelimit = config[step_name]['walltime'] except: walltimelimit = config['DEFAULT']['walltime'] queuename = os.getenv('QUEUENAME') putils.remove_last_job_running_products(run_file=item) if os.getenv('JOBSCHEDULER') in ['SLURM', 'sge']: js.submit_job_with_launcher(batch_file=item, work_dir=os.path.join( inps.work_dir, 'run_files'), memory=memorymax, walltime=walltimelimit, queue=queuename) else: jobs = js.submit_batch_jobs(batch_file=item, out_dir=os.path.join( inps.work_dir, 'run_files'), work_dir=inps.work_dir, memory=memorymax, walltime=walltimelimit, queue=queuename) putils.remove_zero_size_or_length_error_files(run_file=item) putils.rerun_job_if_exit_code_140(run_file=item) putils.raise_exception_if_job_exited(run_file=item) putils.concatenate_error_files(run_file=item, work_dir=inps.work_dir) putils.move_out_job_files_to_stdout(run_file=item) date_str = datetime.datetime.strftime(datetime.datetime.now(), '%Y%m%d:%H%M%S') print(date_str + ' * Job {} completed'.format(item)) date_str = datetime.datetime.strftime(datetime.datetime.now(), '%Y%m%d:%H%M%S') print(date_str + ' * all jobs from {} to {} have been completed'.format( os.path.basename(run_file_list[0]), os.path.basename(run_file_list[-1]))) else: for item in run_file_list: with open(item, 'r') as f: command_lines = f.readlines() for command_line in command_lines: os.system(command_line) return None
def main(iargs=None): """ create orth and geo rectifying run jobs and submit them. """ inps = putils.cmd_line_parse(iargs) inps.geom_masterDir = os.path.join(inps.work_dir, pathObj.geomlatlondir) inps.master = os.path.join(inps.work_dir, pathObj.masterdir) try: inps.dem = glob.glob('{}/DEM/*.wgs84'.format(inps.work_dir))[0] except: print('DEM not exists!') sys.exit(1) if not os.path.exists(inps.geom_masterDir): os.mkdir(inps.geom_masterDir) config = putils.get_config_defaults(config_file='job_defaults.cfg') job_file_name = 'export_ortho_geo' job_name = job_file_name if inps.wall_time == 'None': inps.wall_time = config[job_file_name]['walltime'] wait_seconds, new_wall_time = putils.add_pause_to_walltime(inps.wall_time, inps.wait_time) ######################################### # Submit job ######################################### if inps.submit_flag: js.submit_script(job_name, job_file_name, sys.argv[:], inps.work_dir, new_wall_time) sys.exit(0) time.sleep(wait_seconds) if not iargs is None: message_rsmas.log(inps.work_dir, os.path.basename(__file__) + ' ' + ' '.join(iargs[:])) else: message_rsmas.log(inps.work_dir, os.path.basename(__file__) + ' ' + ' '.join(sys.argv[1::])) demZero = create_demZero(inps.dem, inps.geom_masterDir) swathList = ut.getSwathList(inps.master) create_georectified_lat_lon(swathList, inps.master, inps.geom_masterDir, demZero) merge_burst_lat_lon(inps) multilook_images(inps) run_file_list = make_run_list_amplitude(inps) for item in run_file_list: step_name = 'amplitude_ortho_geo' try: memorymax = config[step_name]['memory'] except: memorymax = config['DEFAULT']['memory'] try: if config[step_name]['adjust'] == 'True': walltimelimit = putils.walltime_adjust(config[step_name]['walltime']) else: walltimelimit = config[step_name]['walltime'] except: walltimelimit = config['DEFAULT']['walltime'] queuename = os.getenv('QUEUENAME') putils.remove_last_job_running_products(run_file=item) jobs = js.submit_batch_jobs(batch_file=item, out_dir=os.path.join(inps.work_dir, 'run_files'), work_dir=inps.work_dir, memory=memorymax, walltime=walltimelimit, queue=queuename) putils.remove_zero_size_or_length_error_files(run_file=item) putils.raise_exception_if_job_exited(run_file=item) putils.concatenate_error_files(run_file=item, work_dir=inps.work_dir) putils.move_out_job_files_to_stdout(run_file=item) return
def main(iargs=None): inps = putils.cmd_line_parse(iargs, script='execute_runfiles') os.chdir(inps.work_dir) time.sleep(putils.pause_seconds(inps.wait_time)) ######################################### # Submit job ######################################### if inps.submit_flag: job_name = 'execute_runfiles' job_file_name = job_name js.submit_script(job_name, job_file_name, sys.argv[:], inps.work_dir) sys.exit(0) run_file_list = putils.read_run_list(inps.work_dir) if inps.end_run == 0: inps.end_run = len(run_file_list) if not inps.start_run == 0: inps.start_run = inps.start_run - 1 if not iargs is None: message_rsmas.log( inps.work_dir, os.path.basename(__file__) + ' ' + ' '.join(iargs[:])) else: message_rsmas.log( inps.work_dir, os.path.basename(__file__) + ' ' + ' '.join(sys.argv[1::])) run_file_list = run_file_list[inps.start_run:inps.end_run] for item in run_file_list: putils.remove_last_job_running_products(run_file=item) job_status = js.submit_batch_jobs(batch_file=item, out_dir=os.path.join( inps.work_dir, 'run_files'), work_dir=inps.work_dir) if job_status: putils.remove_zero_size_or_length_error_files(run_file=item) putils.rerun_job_if_exit_code_140(run_file=item) putils.raise_exception_if_job_exited(run_file=item) putils.concatenate_error_files(run_file=item, work_dir=inps.work_dir) putils.move_out_job_files_to_stdout(run_file=item) date_str = datetime.datetime.strftime(datetime.datetime.now(), '%Y%m%d:%H%M%S') print(date_str + ' * Job {} completed'.format(item)) date_str = datetime.datetime.strftime(datetime.datetime.now(), '%Y%m%d:%H%M%S') print(date_str + ' * all jobs from {} to {} have been completed'.format( os.path.basename(run_file_list[0]), os.path.basename(run_file_list[-1]))) return None