/
pbs_run_methods.py
466 lines (367 loc) · 23.2 KB
/
pbs_run_methods.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
#!/usr/bin/env python
# :noTabs=true:
"""
run methods specifically for submitting jobs in the PBS queuing system
"""
################################################################################
# IMPORT
# common modules
import subprocess
import time
# bigger modules
# custom modules
from vipur_settings import PBS_USER , PBS_ENVIRONMENT_SETUP , PBS_QUEUE_QUOTA , PBS_QUEUE_MONITOR_DELAY , PBS_SERIAL_JOB_OPTIONS , PBS_PARALLEL_JOB_OPTIONS , PBS_BASH_SCRIPT , ROSETTA_ENDING , PBS_PARALLEL_ROSETTA_ENDING , PBS_PARALLEL_ROSETTA_EXECUTION_COMMAND , ROSETTA_RELAX_PARALLEL_OPTIONS
from helper_methods import run_local_commandline , create_executable_str
from pre_processing import *
from run_methods import determine_check_successful_function
from rosetta_feature_generation import remove_intermediate_ddg_monomer_files
from post_processing import *
################################################################################
# PBS RUN METHODS
def run_VIPUR_PBS( pdb_filename = '' , variants_filename = '' ,
out_path = '' , write_numbering_map = True ,
single_relax = True , delete_intermediate_relax_files = True ,
demo = False , rerun_preprocessing = False ):
# for the example input
if demo:
pdb_filename = PATH_TO_VIPUR + '/example_input/2C35.pdb'
variants_filename = PATH_TO_VIPUR + '/example_input/2C35.txt'
out_path = PATH_TO_VIPUR + '/example_output'
# alternatively, run on an entire directory
if not pdb_filename and not variants_filename:
# current directory
print 'no input provided, assuming you want to run on every (.pdb,.txt) file pair found in the current directory'
pdb_filename = os.getcwd()
if os.path.isdir( pdb_filename ) and not variants_filename:
# assume variants_filename from pdb_filename
variants_filename = '.txt'
if os.path.isdir( pdb_filename ) and variants_filename[0] == '.':
# look for file extension
# instead, run on the directory
if not out_path:
out_path = os.path.abspath( pdb_filename )
fa_filenames = [(out_path +'/')*bool( out_path ) + i for i in os.listdir( pdb_filename ) if get_file_extension( i ) == 'fa']
fa_filenames = [[i , get_root_filename( i ) + variants_filename] for i in fa_filenames if os.path.isfile( get_root_filename( i ) + variants_filename ) and not os.path.isfile( get_root_filename( i ) + '.pdb' )]
print 'running VIPUR on all (.pdb,' + variants_filename + ') file pairs found in ' + pdb_filename
# find .pdb files
pdb_filenames = [(out_path +'/')*bool( out_path ) + i for i in os.listdir( pdb_filename ) if get_file_extension( i ) == 'pdb']
# look for pairs
pdb_filenames = [[i , get_root_filename( i ) + variants_filename] for i in pdb_filenames if os.path.isfile( get_root_filename( i ) + variants_filename )]
print str( len( pdb_filenames ) ) + ' pairs found'
print str( len( fa_filenames ) ) + ' pairs found for sequence only mode'
# go there...
# os.chdir( pdb_filename )
if not pdb_filenames:
if not fa_filenames:
raise IOError( '!!! no (.pdb,' + variants_filename + ') file pairs found in ' + pdb_filename + '!!?!\nAND no (.fa,' + variants_filename + ') file pairs were found...' )
else:
print '...only (.fa,' + variants_filename + ') file pairs were found, running in sequence only mode'
else:
# file extension etc.
file_extension = get_file_extension( pdb_filename )
root_filename = get_root_filename( pdb_filename )
# normal execution, generalize by turning into list
pdb_filenames = []
fa_filenames = []
if file_extension == 'pdb':
pdb_filenames = [[(out_path +'/')*bool( out_path ) + pdb_filename , (out_path +'/')*bool( out_path ) + variants_filename]]
else:
fa_filenames = [[]]
# combine all "filenames" to run into unified framework
target_proteins = []#None]*(len( pdb_filenames ) + len( fa_filenames ))
for i in pdb_filenames:
this_out_path = get_root_filename( i[0] ) +'_VIPUR' # directory to create
target_proteins.append( i + [False , this_out_path] )
for i in fa_filenames:
this_out_path = get_root_filename( i[0] ) +'_VIPUR' # directory to create
target_proteins.append( i + [True , this_out_path] )
# setup environment variables BEFORE pre processing
# no need to setup a command, just run it
if PBS_ENVIRONMENT_SETUP:
print 'setting up environment variables'
run_local_commandline( PBS_ENVIRONMENT_SETUP )
# pre processing
task_summaries = []
for i in target_proteins:
# guess what the task summary filename 'would' be, if it exists, keep going...
task_summary_filename = i[3]*bool( i[3] ) +'/'+ get_root_filename( i[0] ).split( '/' )[-1] + '.task_summary'
if os.path.isfile( task_summary_filename ) and not rerun_preprocessing:
print 'hmmm, ' + i[0] + ' seems to have run preprocessing already, skipping now'
#continue # skip this one, do not add to list of tasks...?
# actually, skip running pre-processing BUT DO add it to the list of tasks
else:
task_summary_filename = run_preprocessing( i[0] , i[1] ,
sequence_only = i[2] , out_path = i[3] ,
task_summary_filename = task_summary_filename ,
write_numbering_map = write_numbering_map , single_relax = single_relax ,
pymol_environment_setup = PBS_ENVIRONMENT_SETUP )
# modify for PBS script
task_summary = load_task_summary( task_summary_filename )
for j in xrange( len( task_summary['commands'] ) ):
command = task_summary['commands'][j]['command']
# add for relax
if task_summary['commands'][j]['feature'].replace( '_native' , '' ) == 'relax' and not 'rescore' in task_summary['commands'][j]['feature']:
if not PBS_PARALLEL_ROSETTA_ENDING in command:
command = command.replace( ROSETTA_ENDING , PBS_PARALLEL_ROSETTA_ENDING )
command = PBS_PARALLEL_ROSETTA_EXECUTION_COMMAND + ' '*bool( PBS_PARALLEL_ROSETTA_EXECUTION_COMMAND ) + command
if ROSETTA_RELAX_PARALLEL_OPTIONS:
command += ' '+ ' '.join( ['-'+ k + (' '+ ROSETTA_RELAX_PARALLEL_OPTIONS[k])*bool( ROSETTA_RELAX_PARALLEL_OPTIONS[k] ) for k in ROSETTA_RELAX_PARALLEL_OPTIONS] )
# command += ' -jd2:mpi_file_buf_job_distributor false'
# command += ' -run:multiple_processes_writing_to_one_directory'
# also use the parallel options
pbs_options = 'parallel'#.update( PBS_PARALLEL_JOB_OPTIONS )
else:
pbs_options = 'serial'#.update( PBS_SERIAL_JOB_OPTIONS )
# put "cd" in front
command = ('cd '+ i[3] +';')*bool( i[3] ) + command
# modify the task summary
task_summary['commands'][j]['command'] = command
# actually write the script...
# don't worry about optional #PBS header info
# need to add the variant? no, just use the output_filename for this
script_filename = i[3] + '/'*bool( i[3] ) + get_root_filename( task_summary['commands'][j]['output_filename'].split( '/' )[-1] ) +'.'+ task_summary['commands'][j]['feature'] + '.pbs_script.sh'
task_summary['commands'][j]['script_filename'] = script_filename
f = open( script_filename , 'w' )
f.write( PBS_BASH_SCRIPT( command ) )
f.close()
# use the script filename as the source for any log files
# control the output and error paths
# also generate the pbs call? might as well, keep it simple
# no, uses ":" and "," characters...
task_summary['commands'][j]['queue'] = pbs_options
# rewrite the task summary
write_task_summary( task_summary , task_summary_filename )
task_summaries.append( task_summary )#_filename )
# run them all
run_VIPUR_task_summaries_PBS( task_summaries , single_relax = single_relax , delete_intermediate_relax_files = delete_intermediate_relax_files )
# post processing
# this look identical!!! :)
for i in xrange( len( task_summaries ) ):
# always okay to rerun post processing...should not make any difference
sequence_only = target_proteins[i][2]
print '\n\n\nExtracting and Analyzing the Results:\n\n'
task_summaries[i] = run_postprocessing( task_summaries[i] , sequence_only = sequence_only )
return task_summaries
# different from serial method
# collect up all jobs at once, fire off serially (lol) to queue
# need to be cognizant of queue status, rescore must occur later
def run_VIPUR_task_summaries_PBS( task_summaries , single_relax = True , delete_intermediate_relax_files = True ):
# queue command can be derived from task itself
# this method will run...until all jobs are complete
# reload? really how this should be done...
for i in xrange( len( task_summaries ) ):
if isinstance( task_summaries[i] , str ) and os.path.isfile( task_summaries[i] ):
task_summaries[i] = load_task_summary( task_summaries[i] )
# make a list of ALL jobs instead of per protein tasks - all commands have "cd" if there is an "out_path"
# as a reference, need to track attempts etc. directly, rewrite task summaries frequently
non_rescore_tasks = sum( [[(i , j) for j in xrange( len( task_summaries[i]['commands'] ) )] for i in xrange( len( task_summaries ) )] , [] )
rescore_tasks = [i for i in non_rescore_tasks if 'rescore' in task_summaries[i[0]]['commands'][i[1]]['feature']]
# only rescore once ALL are complete
non_rescore_tasks = [i for i in non_rescore_tasks if not i in rescore_tasks]
run_VIPUR_tasks_PBS( task_summaries , non_rescore_tasks )
# run the rescore tasks
if rescore_tasks:
run_VIPUR_tasks_PBS( task_summaries , rescore_tasks )
# return anything?
# task summaries should be updated with all the necessary files...
# for i in xrange( len( task_summaries ) ):
# tasks will check if the task summaries indicates they have already be run
# task_summaries[i] = run_task_commands_serially( task_summaries[i] , single_relax = single_relax , delete_intermediate_relax_files = delete_intermediate_relax_files )
# submit jobs until complete or too many attempts
def run_VIPUR_tasks_PBS( task_summaries , task_list , max_pbs_tries = 2 , ddg_monomer_cleanup = True , single_relax = True , delete_intermediate_relax_files = False ):
# run the non_rescore tasks
completed = [i for i in task_list if 'run' in task_summaries[i[0]]['commands'][i[1]] and 'success' in task_summaries[i[0]]['commands'][i[1]]['run']]
# should running_or_queued be saved? written to file?
running_or_queued = {}
rounds = 0
all_completed_jobs = [] # prevents annoying bulk output, only see it the first time it completes
while not len( completed ) == len( task_list ):
rounds += 1
print '\n\nQUEUE MONITOR ROUND ' + str( rounds )
# debug
# print running_or_queued
# check queue status
queue_status = get_pbs_queue_status()
# update "running_or_queued" list (?)
# err, no, does not have information on which job it is...:(
#for i in queue_status.keys():
# if queue_status[i] in ['R' , 'Q']:
queue_space_occupied = len( [i for i in queue_status.values() if not i in ['C' , 'R']] ) # ignore "C"ompleted jobs, "R"unning job quota are not set by us...
# if your queue system does not have a separate "R"un quota, remove 'R' from the above!
available_space = PBS_QUEUE_QUOTA - queue_space_occupied
# launch next jobs in available slots
if available_space:
print str( queue_space_occupied ) + ' jobs queued or running, could submit up to ' + str( available_space ) + ' more'
# choose the next job
jobs_to_run = [i for i in task_list if
not i in completed and
not i in running_or_queued.values() and
not ( 'run' in task_summaries[i[0]]['commands'][i[1]] and
('success' in task_summaries[i[0]]['commands'][i[1]]['run'] or
'failure' in task_summaries[i[0]]['commands'][i[1]]['run']) )
]
print str( len( jobs_to_run ) ) + ' jobs still need to finish (after the currently running jobs complete)'
# only the next few
for i in jobs_to_run[:available_space]:
command_dict = task_summaries[i[0]]['commands'][i[1]]
# write scripts as part of pre processing?...yeah...
# write the command to a script
#script_filename = command_dict['out_path'] +'/'*bool( command_dict['out_path'] )+
#script_filename = command_dict['script_filename']
# if its a rescore and relax jobs were separated, need to recombine them!
if 'rescore' in command_dict['feature']:
# combine the individual relax runs
#relax_commands = [i for i in task_summary['commands'] if i['feature'].replace( '_native' , '' ) == 'relax']
#silent_filenames = [j['output_filename'] for j in relax_commands if j['variant'] == i['variant'] and 'run' in j.keys() and j['run'] == 'success']
silent_filenames = [j['output_filename'] for j in task_summaries[i[0]]['commands'] if
j['feature'].replace( '_native' , '' ) == 'relax' and
j['variant'] == command_dict['variant'] and
'run' in j.keys() and
'success' in j['run']
]
# actually need to identify the combined_silent_filename, be sure the relax files have not already been merged
# which variant
target_variant = [j for j in task_summaries[i[0]]['variants'].keys() if j.split( '_' )[-1] == command_dict['variant'] and j.split( '_' )[0] in command_dict['command']]
if not target_variant:
# its native
combined_silent_filename = task_summaries[i[0]]['other']['combined_native_silent_filename']
combined_score_filename = task_summaries[i[0]]['other']['combined_native_score_filename']
elif len( target_variant ) > 1:
raise Exception( '??? found more than on matching variant ???\n' + ', '.join( target_variant ) )
else:
# found it
combined_silent_filename = task_summaries[i[0]]['variants'][target_variant[0]]['combined_silent_filename']
combined_score_filename = task_summaries[i[0]]['variants'][target_variant[0]]['combined_score_filename']
#if not single_relax: # AND post processing has not already be run...scan for the combined silent file
if not single_relax and not os.path.isfile( combined_silent_filename ):
if not len( silent_filenames ) == ROSETTA_RELAX_OPTIONS['nstruct']:
raise Exception( '??? somehow the matching relax run(s) has failed ???\n' + str( i ) )
score_filenames = [j.replace( '.silent' , '.sc' ) for j in silent_filenames]
merge_rosetta_relax_output( silent_filenames , combined_silent_filename , score_filenames , combined_score_filename , delete_old_files = delete_intermediate_relax_files )
# rescore already knows the proper filename
else:
# just a single match for each
# output filename should be correct as is :)
None
# submit this script using a queue command
# generate it here instead
pbs_options = {}
if command_dict['queue'] == 'parallel':
pbs_options.update( PBS_PARALLEL_JOB_OPTIONS )
elif command_dict['queue'] == 'serial':
pbs_options.update( PBS_SERIAL_JOB_OPTIONS )
# make sure they are satisfied
script_filename = command_dict['script_filename']
for k in pbs_options.keys():
if '__call__' in dir( pbs_options[k] ):
pbs_options[k] = pbs_options[k]( script_filename )
pbs_command = create_executable_str( 'qsub' , [script_filename] , pbs_options )
new_job_id = run_local_commandline( pbs_command , collect_stdout = True )
new_job_id = new_job_id.strip()
if '.' in new_job_id:
new_job_id = new_job_id[:new_job_id.find( '.' )]
print 'submitted ' + new_job_id
# save the job id
# assume its queue
running_or_queued[new_job_id] = i
else:
print 'no new \"positions\" are available'
# debug, need to know
running_jobs = len( [i for i in queue_status.values() if i in ['R']] )
if running_jobs:
print str( running_jobs ) + ' are still running...(excluding the jobs just submitted and including your other jobs)'
# assess outcome of completed jobs
for job_id in sorted( queue_status.keys() ): # sort in numerical order, right?
# debug
if not job_id in all_completed_jobs:
print '\t'+ job_id , queue_status[job_id]# , job_id in running_or_queued.keys()
# could just skip it all now?
if queue_status[job_id] == 'C' and job_id in running_or_queued.keys():
task_id = running_or_queued[job_id][0]
command_index = running_or_queued[job_id][1]
command_dict = task_summaries[task_id]['commands'][command_index]
check_successful = determine_check_successful_function( command_dict , single_relax = single_relax )
success = check_successful( command_dict )
failure_summary = ''
if isinstance( success , bool ):
complete = success
elif len( success ) > 1 and isinstance( success[0] , bool ):
complete = success[0]
failure_summary += ' '+ ';'.join( [str( j ) for j in success[1:]] ) +' '
print complete , failure_summary , 'try again?'*bool( not complete ) # debug
# track the number of attempts?
# try until failure - how many times?
tries = 0
if 'run' in command_dict.keys() and command_dict['run'] and not 'success' in command_dict['run'] and not 'failure' in command_dict['run']:
tries = int( command_dict['run'] )
tries += 1
print tries , 'attempts so far' # debug
if tries >= max_pbs_tries:
# its a failure
print job_id + ' completed successfully'*complete + (' failed with ' + str( tries ) + ' attempts')*(not complete)
failure_summary = 'success'*complete + (str( tries ) +' tries;failure ' + failure_summary)*(not complete)
elif complete:
print job_id + ' completed successfully'
failure_summary = 'success' #+ str( tries ) + ' tries'
else:
# record the number of tries
print job_id + ' completed' + ' successfully'*complete
failure_summary = str( tries )
# update the record
print 'updating with: ' + failure_summary # debug
task_summaries[task_id]['commands'][command_index]['run'] = failure_summary
# optionally cleanup
if ddg_monomer_cleanup and command_dict['feature'] == 'ddg_monomer':#'ddg' in i['output_filename']:
print 'ddg_monomer writes useless output files, deleting these now...'
remove_intermediate_ddg_monomer_files()
# jobs that have since been completed - consider them complete?
completed.append( running_or_queued[job_id] ) # good, so this grows
del running_or_queued[job_id]
# remove jobs to run?
# print 'updating the status...' # debug
# write out "completed"? or "running_or_queued"?
if queue_status[job_id] == 'C' and not job_id in all_completed_jobs:
all_completed_jobs.append( job_id ) # prevent redundant update info
# update task_summaries e.g. write them!
# modified: so the task summary records its own name...bah!
for i in task_summaries:
if not 'task_summary_filename' in i['filenames'].keys():
raise NotImplementedError( 'should input the task summary filename (not the summary itself)...' )
else:
# write it out
print 'updating: ' + i['filenames']['task_summary_filename']
write_task_summary( i , i['filenames']['task_summary_filename'] )
# pause...
print '\n' , len( completed ) , 'completed' , len( task_list ) , 'tasks remaining' # debug
if len( completed ) <= len( task_list ): # no need for edge-case end wait
print 'waiting ' + str( PBS_QUEUE_MONITOR_DELAY ) +'s...'
time.sleep( PBS_QUEUE_MONITOR_DELAY )
# return anything?
# write one last time?
for i in task_summaries:
if not 'task_summary_filename' in i['filenames'].keys():
raise NotImplementedError( 'should input the task summary filename (not the summary itself)...' )
else:
# write it out
write_task_summary( i , i['filenames']['task_summary_filename'] )
################################################################################
# PBS INTERACTION METHODS
# ASSUMES it is the ONLY application submitting jobs as this user...
# simple parse/scan the queue, use qstat -u
def get_pbs_queue_status( user = PBS_USER , header_lines = 5 , trailer_lines = 1 , only_job_status = True ):
# header_lines = 2 for FULL queue, = 5 for USER queue ("-u")
command = 'qstat'
if user:
command += ' -u ' + user
queue_info = run_local_commandline( command , collect_stdout = True )
# simple parsing
queue_info = queue_info.split( '\n' )[header_lines:-1*trailer_lines]
queue_info = [[j for j in i.split( ' ' ) if j.strip()] for i in queue_info]
# optionally only report the job statuses
if only_job_status:
queue_info = [(i[0][:i[0].find( '.' )] , i[-2]) if '.' in i[0] else (i[0] , i[-2]) for i in queue_info]
# make into a dict? job ids should be unique...
queue_info = dict( queue_info )
return queue_info
# primarily concerned with job status only
# need 1st and 2nd to last columns