def ncop_chunktime(script, kwargs, chunk_size, start=0, stop=None, clobber=False, cleanup=True): ''' run script on time segments within a file and concatenate results ''' jid_list = [] def op_one_chunk(tnx): #-- intermediate output file file_out_i = file_out + '.tnx.%d-%d' % (tnx) #-- update input arguments kwargs.update({'dimsub': {'time': tnx}, 'file_out': file_out_i}) #-- submit print('\'{0}\''.format(json.dumps(kwargs))) if not os.path.exists(file_out_i) or clobber: jid = tm.submit([script, '\'{0}\''.format(json.dumps(kwargs))], memory='30GB') jid_list.append(jid) return file_out_i file_out = copy.copy(kwargs['file_out']) if not os.path.exists(file_out) or clobber: #-- get time chunks if stop is None: ds = xr.open_dataset(kwargs['file_in'], decode_times=False, decode_coords=False) stop = len(ds.time) time_chunks = gen_time_chunks(start, stop, chunk_size) #-- operate on each chunk file_cat = [op_one_chunk(tnx) for tnx in time_chunks] #-- concatenate files jid = ncrcat(file_cat, file_out, depjob=jid_list) if cleanup: tm.submit(['rm', '-f', ' '.join(file_cat)], depjob=[jid]) return jid_list
def op_one_chunk(tnx): #-- intermediate output file file_out_i = file_out + '.tnx.%d-%d' % (tnx) #-- update input arguments kwargs.update({'dimsub': {'time': tnx}, 'file_out': file_out_i}) #-- submit print('\'{0}\''.format(json.dumps(kwargs))) if not os.path.exists(file_out_i) or clobber: jid = tm.submit([script, '\'{0}\''.format(json.dumps(kwargs))], memory='30GB') jid_list.append(jid) return file_out_i
def ncrcat(input, output, depjob=[]): ''' call ncrcat ''' (fid, tmpfile) = tempfile.mkstemp('.filelist') with open(tmpfile, 'w') as fid: for f in input: fid.write('%s\n' % f) jid = tm.submit(['cat', tmpfile, '|', 'ncrcat', '-O', '-o', output], depjob=depjob, memory='100GB', modules=['nco'], module_purge=False) return jid
print(f'exists: {file_cat_basename}...skipping') continue logger.info(f'creating {file_cat}') vars = ','.join(static_vars+[v]) cat_cmd = [f'cat {tmpfile} | ncrcat -O -h -v {vars} {file_cat}'] compress_cmd = [f'ncks -O -4 -L 1 {file_cat} {file_cat}'] if not demo: if campaign_transfer: xfr_cmd = [f'{script_path}/globus.py', '--src-ep=glade --dst-ep=campaign', '--retry=3', f'--src-paths={file_cat}', f'--dst-paths={campaign_dout}/{file_cat_basename}'] cleanup_cmd = [f'if [ $? -eq 0 ]; then rm -f {file_cat}; else exit 1; fi'] else: xfr_cmd = [] cleanup_cmd = [] jid = tm.submit([cat_cmd, compress_cmd, xfr_cmd, cleanup_cmd], modules=['nco'], memory='100GB') print() tm.wait() if __name__ == '__main__': main()
from subprocess import call from workflow import task_manager as tm import time # stop script (graceful exit) and wait on jobs after: tm.QUEUE_MAX_HOURS = 1000./3600. # max number of jobs in queue tm.MAXJOBS = 15 # number of jobs to submit and wait on N = 10 # loop and submit for i in range(0,N+1): time.sleep(0) jid = tm.submit([['touch','test.file.%03d'%i],['sleep','2'],['echo','done']]) print('waiting') time.sleep(0) # submit dependent job jid = tm.submit(['ls','-1'],depjob=tm.JID) # wait on all jobs ok = tm.wait() for i in range(0,N+1): call(['rm','-f','test.file.%03d'%i])
#! /usr/bin/env python import os from subprocess import call from glob import glob from workflow import task_manager as tm tm.CONDA_ENV = 'py3_geyser' script = '/gpfs/u/home/mclong/p/o2-prediction/calcs/proc_cesm_dple.py' for i in range(10): tm.submit(['./proc_cesm_dple.py', '-v', 'O2', '-i', f'{i}'], memory='200GB') tm.wait()