def run_probe( pdb_filename , variants , probe_output_filename = '' , run = True ):
    """
    Runs PROBE on  <pdb_filename>  on the positions found among  <variants>
    using the default options in PROBE_OPTIONS and writes the output to
    <probe_output_filename>  (also returns this output filename)
    """
    if not probe_output_filename:
        probe_output_filename = os.path.abspath( pdb_filename ).rstrip( '.pdb' ) + '.probe_out'

    # get the unique variant positions
    positions = list( set( [i[1:-1] for i in variants] ) )
    positions.sort()
    
    # generate the commands to run
#    command = '#!/bin/sh\nrm ' + probe_output_filename + '\ntouch ' + probe_output_filename + '\n'
    command = 'rm ' + probe_output_filename + ';touch ' + probe_output_filename + ';'
    # delete any prior copy since we will append to it
    
    for i in positions:
        probe_options = {}
        probe_options.update( PROBE_OPTIONS )
            
        probe_options['out'] = pdb_filename
        probe_options['Q'] = str( i )

        command += create_executable_str( PATH_TO_PROBE , [] , probe_options , probe_output_filename , append = True ) +';'#'\n'

    # run PROBE, store the output
    if run:
        run_local_commandline( command )

        return probe_output_filename , positions
    else:
        # the command, well, get positions etc. too
        return command , probe_output_filename , positions
def run_psiblast( sequence_filename , run = True ):
    """
    Runs PSIBLAST on  <sequence_filename>  using the default options in
    PSIBLAST_OPTIONS and returns the relevant output file: "out_ascii_pssm"
    """
    root_filename = os.path.abspath( sequence_filename ).rstrip( '.fa' )
    
    # collect the options, set the input, derive the output filenames
    psiblast_options = {}
    psiblast_options.update( PSIBLAST_OPTIONS )
    psiblast_options['query'] = sequence_filename
    for i in psiblast_options.keys():
        if '__call__' in dir( psiblast_options[i] ):
            psiblast_options[i] = psiblast_options[i]( root_filename )

    for i in psiblast_options.keys():
        if isinstance( psiblast_options[i] , str ) and os.path.isfile( psiblast_options[i] ):
            psiblast_options[i] = os.path.abspath( psiblast_options[i] )
    
    command = create_executable_str( PATH_TO_PSIBLAST , args = [] , options = psiblast_options )

    if run:
        run_local_commandline( command )
    
        # the only output we need
        return psiblast_options['out_ascii_pssm']
    else:
        # just send the command
        return command , psiblast_options['out_ascii_pssm']
def run_rosetta_relax_local( pdb_filename , extra_options = {} , run = True ):
    root_filename = os.path.abspath( pdb_filename ).replace( '.pdb' , '' )
    
    # collect the options, set the input, derive the output filenames
    relax_options = {}
    relax_options.update( ROSETTA_RELAX_OPTIONS )
    relax_options.update( extra_options )
    relax_options['s'] = pdb_filename
    relax_options['native'] = pdb_filename    # required to get gdtmm scores
    for i in relax_options.keys():
        if '__call__' in dir( relax_options[i] ):
            relax_options[i] = relax_options[i]( root_filename )

    for i in relax_options.keys():
        if isinstance( relax_options[i] , str ) and os.path.isfile( relax_options[i] ):
            relax_options[i] = os.path.abspath( relax_options[i] )

    # ...weird Rosetta append behavior...
    if os.path.isfile( relax_options['out:file:silent'] ):
        os.remove( relax_options['out:file:silent'] )
    if os.path.isfile( relax_options['out:file:scorefile'] ):
        os.remove( relax_options['out:file:scorefile'] )
    
    command = create_executable_str( PATH_TO_ROSETTA_RELAX , args = [] , options = relax_options )

    if run:
        run_local_commandline( command )
    
        # the only output we need
        return relax_options['out:file:silent']
    else:
        return command , relax_options['out:file:silent']
Exemplo n.º 4
0
def run_psiblast(sequence_filename, run=True):
    """
    Runs PSIBLAST on  <sequence_filename>  using the default options in
    PSIBLAST_OPTIONS and returns the relevant output file: "out_ascii_pssm"
    """
    root_filename = os.path.abspath(sequence_filename).rstrip('.fa')

    # collect the options, set the input, derive the output filenames
    psiblast_options = {}
    psiblast_options.update(PSIBLAST_OPTIONS)
    psiblast_options['query'] = sequence_filename
    for i in psiblast_options.keys():
        if '__call__' in dir(psiblast_options[i]):
            psiblast_options[i] = psiblast_options[i](root_filename)

    for i in psiblast_options.keys():
        if isinstance(psiblast_options[i], str) and os.path.isfile(
                psiblast_options[i]):
            psiblast_options[i] = os.path.abspath(psiblast_options[i])

    command = create_executable_str(PATH_TO_PSIBLAST,
                                    args=[],
                                    options=psiblast_options)

    if run:
        run_local_commandline(command)

        # the only output we need
        return psiblast_options['out_ascii_pssm']
    else:
        # just send the command
        return command, psiblast_options['out_ascii_pssm']
Exemplo n.º 5
0
def run_rosetta_relax_local(pdb_filename, extra_options={}, run=True):
    root_filename = os.path.abspath(pdb_filename).replace('.pdb', '')

    # collect the options, set the input, derive the output filenames
    relax_options = {}
    relax_options.update(ROSETTA_RELAX_OPTIONS)
    relax_options.update(extra_options)
    relax_options['s'] = pdb_filename
    relax_options['native'] = pdb_filename  # required to get gdtmm scores
    for i in relax_options.keys():
        if '__call__' in dir(relax_options[i]):
            relax_options[i] = relax_options[i](root_filename)

    for i in relax_options.keys():
        if isinstance(relax_options[i], str) and os.path.isfile(
                relax_options[i]):
            relax_options[i] = os.path.abspath(relax_options[i])

    # ...weird Rosetta append behavior...
    if os.path.isfile(relax_options['out:file:silent']):
        os.remove(relax_options['out:file:silent'])
    if os.path.isfile(relax_options['out:file:scorefile']):
        os.remove(relax_options['out:file:scorefile'])

    command = create_executable_str(PATH_TO_ROSETTA_RELAX,
                                    args=[],
                                    options=relax_options)

    if run:
        run_local_commandline(command)

        # the only output we need
        return relax_options['out:file:silent']
    else:
        return command, relax_options['out:file:silent']
Exemplo n.º 6
0
def run_rosetta_ddg_monomer(pdb_filename,
                            mut_filename,
                            out_filename='',
                            out_path='',
                            cleanup=True,
                            run=True):
    root_filename = os.path.abspath(pdb_filename).rstrip('.pdb')
    # hardcoded...ddg_monomer is such a painful protocol...
    out_filename = ''
    if '/' in root_filename:
        out_filename += '/'.join(root_filename.split('/')[:-1]) + '/'
    out_filename += 'ddg_predictions.out'
    # clear it out if it exists, otherwise it will be appended to...
    if os.path.exists(out_filename):
        os.remove(out_filename)

    # collect the options, set the input, derive the output filenames
    ddg_monomer_options = {}
    ddg_monomer_options.update(ROSETTA_DDG_MONOMER_OPTIONS)
    ddg_monomer_options['in:file:s'] = pdb_filename
    ddg_monomer_options['ddg::mut_file'] = mut_filename
    for i in ddg_monomer_options.keys():
        if '__call__' in dir(ddg_monomer_options[i]):
            ddg_monomer_options[i] = ddg_monomer_options[i](root_filename)

    for i in ddg_monomer_options.keys():
        if isinstance(ddg_monomer_options[i], str) and os.path.isfile(
                ddg_monomer_options[i]):
            ddg_monomer_options[i] = os.path.abspath(ddg_monomer_options[i])

    command = ''
    # optionally move into the specific directory...
    if out_path:
        command += 'cd ' + out_path + '; '  #\n\n'

    command += create_executable_str(PATH_TO_ROSETTA_DDG_MONOMER,
                                     args=[],
                                     options=ddg_monomer_options)

    if run:
        run_local_commandline(command)

        # optionally cleanup
        if cleanup:
            print 'ddg_monomer writes useless output files, deleting these now...'
            remove_intermediate_ddg_monomer_files()

        # the only output we need
        return out_filename
    else:
        return command, out_filename
Exemplo n.º 7
0
def run_rosetta_rescore(silent_filename,
                        native_filename,
                        score_filename='',
                        run=True):
    """
    Performs extraction of individual PDB structures from  <silent_filename>
    to  <out_dir>  (default to current location) using the "score" protocol
    of Rosetta (built against 3.5)
    
    Optionally specify  <extra_options>
    """
    root_filename = os.path.abspath(silent_filename).rstrip('.silent')

    score_options = {}
    score_options.update(ROSETTA_SCORE_OPTIONS)
    score_options['in:file:silent'] = silent_filename
    score_options[
        'in:file:native'] = native_filename  # required to get gdtmm scores
    for i in score_options.keys():
        if '__call__' in dir(score_options[i]):
            score_options[i] = score_options[i](root_filename)

    # necessary...
    if 'out:file:scorefile' in score_options.keys(
    ) and not 'rescore.sc' in score_options['out:file:scorefile']:
        score_options['out:file:scorefile'] = score_options[
            'out:file:scorefile'].replace('.sc', '_rescore.sc')

    for i in score_options.keys():
        if isinstance(score_options[i], str) and os.path.isfile(
                score_options[i]):
            score_options[i] = os.path.abspath(score_options[i])

    # ...weird Rosetta append behavior...
    if os.path.isfile(score_options['out:file:scorefile']):
        os.remove(score_options['out:file:scorefile'])

    # default options
    command = create_executable_str(PATH_TO_ROSETTA_SCORE,
                                    args=[],
                                    options=score_options)

    if run:
        run_local_commandline(command)

        return score_options['out:file:scorefile']
    else:
        return command, score_options['out:file:scorefile']
def run_rosetta_ddg_monomer( pdb_filename , mut_filename , out_filename = '' , out_path = '' , cleanup = True , run = True ):
    root_filename = os.path.abspath( pdb_filename ).rstrip( '.pdb' )
    # hardcoded...ddg_monomer is such a painful protocol...
    out_filename = ''
    if '/' in root_filename:
        out_filename += '/'.join( root_filename.split( '/' )[:-1] ) +'/'
    out_filename += 'ddg_predictions.out'
    # clear it out if it exists, otherwise it will be appended to...
    if os.path.exists( out_filename ):
        os.remove( out_filename )

    # collect the options, set the input, derive the output filenames
    ddg_monomer_options = {}
    ddg_monomer_options.update( ROSETTA_DDG_MONOMER_OPTIONS )
    ddg_monomer_options['in:file:s'] = pdb_filename
    ddg_monomer_options['ddg::mut_file'] = mut_filename
    for i in ddg_monomer_options.keys():
        if '__call__' in dir( ddg_monomer_options[i] ):
            ddg_monomer_options[i] = ddg_monomer_options[i]( root_filename )

    for i in ddg_monomer_options.keys():
        if isinstance( ddg_monomer_options[i] , str ) and os.path.isfile( ddg_monomer_options[i] ):
            ddg_monomer_options[i] = os.path.abspath( ddg_monomer_options[i] )
    
    command = ''
    # optionally move into the specific directory...
    if out_path:
        command += 'cd '+ out_path +'; '
    
    command += create_executable_str( PATH_TO_ROSETTA_DDG_MONOMER , args = [] , options = ddg_monomer_options )

    if run:
        run_local_commandline( command )
    
        # optionally cleanup
        if cleanup:
            print 'ddg_monomer writes useless output files, deleting these now...'
            remove_intermediate_ddg_monomer_files()
        
        # the only output we need
        return out_filename
    else:
        return command , out_filename
def run_rosetta_rescore( silent_filename , native_filename , score_filename = '' , run = True ):
    """
    Performs extraction of individual PDB structures from  <silent_filename>
    to  <out_dir>  (default to current location) using the "score" protocol
    of Rosetta (built against 3.5)
    
    Optionally specify  <extra_options>
    """
    root_filename = os.path.abspath( silent_filename ).rstrip( '.silent' )
    
    score_options = {}
    score_options.update( ROSETTA_SCORE_OPTIONS )
    score_options['in:file:silent'] = silent_filename
    score_options['in:file:native'] = native_filename    # required to get gdtmm scores
    for i in score_options.keys():
        if '__call__' in dir( score_options[i] ):
            score_options[i] = score_options[i]( root_filename )

    # necessary...
    if 'out:file:scorefile' in score_options.keys() and not 'rescore.sc' in score_options['out:file:scorefile']:
        score_options['out:file:scorefile'] = score_options['out:file:scorefile'].replace( '.sc' , '_rescore.sc' )

    for i in score_options.keys():
        if isinstance( score_options[i] , str ) and os.path.isfile( score_options[i] ):
            score_options[i] = os.path.abspath( score_options[i] )

    # ...weird Rosetta append behavior...
    if os.path.isfile( score_options['out:file:scorefile'] ):
        os.remove( score_options['out:file:scorefile'] )
        
    # default options
    command = create_executable_str( PATH_TO_ROSETTA_SCORE , args = [] , options = score_options )

    if run:
        run_local_commandline( command )
    
        return score_options['out:file:scorefile']
    else:
        return command , score_options['out:file:scorefile']
Exemplo n.º 10
0
def run_probe(pdb_filename, variants, probe_output_filename='', run=True):
    """
    Runs PROBE on  <pdb_filename>  on the positions found among  <variants>
    using the default options in PROBE_OPTIONS and writes the output to
    <probe_output_filename>  (also returns this output filename)
    """
    if not probe_output_filename:
        probe_output_filename = os.path.abspath(pdb_filename).rstrip(
            '.pdb') + '.probe_out'

    # get the unique variant positions
    positions = list(set([i[1:-1] for i in variants]))
    positions.sort()

    # generate the commands to run
    #    command = '#!/bin/sh\nrm ' + probe_output_filename + '\ntouch ' + probe_output_filename + '\n'
    command = 'rm ' + probe_output_filename + ';touch ' + probe_output_filename + ';'
    # delete any prior copy since we will append to it

    for i in positions:
        probe_options = {}
        probe_options.update(PROBE_OPTIONS)

        probe_options['out'] = pdb_filename
        probe_options['Q'] = str(i)

        command += create_executable_str(PATH_TO_PROBE, [],
                                         probe_options,
                                         probe_output_filename,
                                         append=True) + ';'  #'\n'

    # run PROBE, store the output
    if run:
        run_local_commandline(command)

        return probe_output_filename, positions
    else:
        # the command, well, get positions etc. too
        return command, probe_output_filename, positions
Exemplo n.º 11
0
def run_VIPUR_tasks_PBS(task_summaries,
                        task_list,
                        max_pbs_tries=2,
                        ddg_monomer_cleanup=True,
                        single_relax=True,
                        delete_intermediate_relax_files=False):
    # run the non_rescore tasks
    completed = [
        i for i in task_list if 'run' in task_summaries[i[0]]['commands'][i[1]]
        and 'success' in task_summaries[i[0]]['commands'][i[1]]['run']
    ]
    # should running_or_queued be saved? written to file?
    running_or_queued = {}
    rounds = 0
    all_completed_jobs = [
    ]  # prevents annoying bulk output, only see it the first time it completes
    while not len(completed) == len(task_list):
        rounds += 1
        print '\n\nQUEUE MONITOR ROUND ' + str(rounds)

        # debug
        #        print running_or_queued

        # check queue status
        queue_status = get_pbs_queue_status()

        # update "running_or_queued" list (?)
        # err, no, does not have information on which job it is...:(
        #for i in queue_status.keys():
        #    if queue_status[i] in ['R' , 'Q']:

        queue_space_occupied = len([
            i for i in queue_status.values() if not i in ['C', 'R']
        ])  # ignore "C"ompleted jobs, "R"unning job quota are not set by us...
        # if your queue system does not have a separate "R"un quota, remove 'R' from the above!
        available_space = PBS_QUEUE_QUOTA - queue_space_occupied

        # launch next jobs in available slots
        if available_space:
            print str(queue_space_occupied
                      ) + ' jobs queued or running, could submit up to ' + str(
                          available_space) + ' more'
            # choose the next job
            jobs_to_run = [
                i for i in task_list
                if not i in completed and not i in running_or_queued.values()
                and not ('run' in task_summaries[i[0]]['commands'][i[1]] and
                         ('success' in task_summaries[i[0]]['commands'][
                             i[1]]['run'] or 'failure' in task_summaries[i[0]]
                          ['commands'][i[1]]['run']))
            ]
            print str(
                len(jobs_to_run)
            ) + ' jobs still need to finish (after the currently running jobs complete)'

            # only the next few
            for i in jobs_to_run[:available_space]:
                command_dict = task_summaries[i[0]]['commands'][i[1]]

                # write scripts as part of pre processing?...yeah...
                # write the command to a script
                #script_filename = command_dict['out_path'] +'/'*bool( command_dict['out_path'] )+
                #script_filename = command_dict['script_filename']

                # if its a rescore and relax jobs were separated, need to recombine them!
                if 'rescore' in command_dict['feature']:
                    # combine the individual relax runs
                    #relax_commands = [i for i in task_summary['commands'] if i['feature'].replace( '_native' , '' ) == 'relax']
                    #silent_filenames = [j['output_filename'] for j in relax_commands if j['variant'] == i['variant'] and 'run' in j.keys() and j['run'] == 'success']
                    silent_filenames = [
                        j['output_filename']
                        for j in task_summaries[i[0]]['commands']
                        if j['feature'].replace('_native', '') == 'relax'
                        and j['variant'] == command_dict['variant']
                        and 'run' in j.keys() and 'success' in j['run']
                    ]
                    # actually need to identify the combined_silent_filename, be sure the relax files have not already been merged
                    # which variant
                    target_variant = [
                        j for j in task_summaries[i[0]]['variants'].keys()
                        if j.split('_')[-1] == command_dict['variant']
                        and j.split('_')[0] in command_dict['command']
                    ]
                    if not target_variant:
                        # its native
                        combined_silent_filename = task_summaries[
                            i[0]]['other']['combined_native_silent_filename']
                        combined_score_filename = task_summaries[
                            i[0]]['other']['combined_native_score_filename']
                    elif len(target_variant) > 1:
                        raise Exception(
                            '??? found more than on matching variant ???\n' +
                            ', '.join(target_variant))
                    else:
                        # found it
                        combined_silent_filename = task_summaries[
                            i[0]]['variants'][
                                target_variant[0]]['combined_silent_filename']
                        combined_score_filename = task_summaries[
                            i[0]]['variants'][
                                target_variant[0]]['combined_score_filename']

                    #if not single_relax:    # AND post processing has not already be run...scan for the combined silent file
                    if not single_relax and not os.path.isfile(
                            combined_silent_filename):
                        if not len(silent_filenames
                                   ) == ROSETTA_RELAX_OPTIONS['nstruct']:
                            raise Exception(
                                '??? somehow the matching relax run(s) has failed ???\n'
                                + str(i))
                        score_filenames = [
                            j.replace('.silent', '.sc')
                            for j in silent_filenames
                        ]

                        merge_rosetta_relax_output(
                            silent_filenames,
                            combined_silent_filename,
                            score_filenames,
                            combined_score_filename,
                            delete_old_files=delete_intermediate_relax_files)
                        # rescore already knows the proper filename
                    else:
                        # just a single match for each
                        # output filename should be correct as is :)
                        None

                # submit this script using a queue command
                # generate it here instead
                pbs_options = {}
                if command_dict['queue'] == 'parallel':
                    pbs_options.update(PBS_PARALLEL_JOB_OPTIONS)
                elif command_dict['queue'] == 'serial':
                    pbs_options.update(PBS_SERIAL_JOB_OPTIONS)
                # make sure they are satisfied
                script_filename = command_dict['script_filename']
                for k in pbs_options.keys():
                    if '__call__' in dir(pbs_options[k]):
                        pbs_options[k] = pbs_options[k](script_filename)

                pbs_command = create_executable_str('qsub', [script_filename],
                                                    pbs_options)
                new_job_id = run_local_commandline(pbs_command,
                                                   collect_stdout=True)
                new_job_id = new_job_id.strip()
                if '.' in new_job_id:
                    new_job_id = new_job_id[:new_job_id.find('.')]
                print 'submitted ' + new_job_id

                # save the job id
                # assume its queue
                running_or_queued[new_job_id] = i

        else:
            print 'no new \"positions\" are available'

        # debug, need to know
        running_jobs = len([i for i in queue_status.values() if i in ['R']])
        if running_jobs:
            print str(
                running_jobs
            ) + ' are still running...(excluding the jobs just submitted and including your other jobs)'

        # assess outcome of completed jobs
        for job_id in sorted(
                queue_status.keys()):  # sort in numerical order, right?
            # debug
            if not job_id in all_completed_jobs:
                print '\t' + job_id, queue_status[
                    job_id]  # , job_id in running_or_queued.keys()
                # could just skip it all now?

            if queue_status[
                    job_id] == 'C' and job_id in running_or_queued.keys():
                task_id = running_or_queued[job_id][0]
                command_index = running_or_queued[job_id][1]
                command_dict = task_summaries[task_id]['commands'][
                    command_index]

                check_successful = determine_check_successful_function(
                    command_dict, single_relax=single_relax)

                success = check_successful(command_dict)

                failure_summary = ''
                if isinstance(success, bool):
                    complete = success
                elif len(success) > 1 and isinstance(success[0], bool):
                    complete = success[0]
                    failure_summary += ' ' + ';'.join(
                        [str(j) for j in success[1:]]) + ' '
                    print complete, failure_summary, 'try again?' * bool(
                        not complete)  # debug

                # track the number of attempts?
                # try until failure - how many times?
                tries = 0
                if 'run' in command_dict.keys(
                ) and command_dict['run'] and not 'success' in command_dict[
                        'run'] and not 'failure' in command_dict['run']:
                    tries = int(command_dict['run'])
                tries += 1
                print tries, 'attempts so far'  # debug

                if tries >= max_pbs_tries:
                    # its a failure
                    print job_id + ' completed successfully' * complete + (
                        ' failed with ' + str(tries) +
                        ' attempts') * (not complete)
                    failure_summary = 'success' * complete + (
                        str(tries) + ' tries;failure ' +
                        failure_summary) * (not complete)
                elif complete:
                    print job_id + ' completed successfully'
                    failure_summary = 'success'  #+ str( tries ) + ' tries'
                else:
                    # record the number of tries
                    print job_id + ' completed' + ' successfully' * complete
                    failure_summary = str(tries)

                # update the record
                print 'updating with: ' + failure_summary  # debug
                task_summaries[task_id]['commands'][command_index][
                    'run'] = failure_summary

                # optionally cleanup
                if ddg_monomer_cleanup and command_dict[
                        'feature'] == 'ddg_monomer':  #'ddg' in i['output_filename']:
                    print 'ddg_monomer writes useless output files, deleting these now...'
                    remove_intermediate_ddg_monomer_files()

                # jobs that have since been completed - consider them complete?
                completed.append(
                    running_or_queued[job_id])  # good, so this grows
                del running_or_queued[job_id]
                # remove jobs to run?
#                print 'updating the status...'    # debug

# write out "completed"? or "running_or_queued"?

            if queue_status[job_id] == 'C' and not job_id in all_completed_jobs:
                all_completed_jobs.append(
                    job_id)  # prevent redundant update info

        # update task_summaries e.g. write them!
        # modified: so the task summary records its own name...bah!
        for i in task_summaries:
            if not 'task_summary_filename' in i['filenames'].keys():
                raise NotImplementedError(
                    'should input the task summary filename (not the summary itself)...'
                )
            else:
                # write it out
                print 'updating: ' + i['filenames']['task_summary_filename']
                write_task_summary(i, i['filenames']['task_summary_filename'])

        # pause...
        print '\n', len(completed), 'completed', len(
            task_list), 'tasks remaining'  # debug
        if len(completed) <= len(task_list):  # no need for edge-case end wait
            print 'waiting ' + str(PBS_QUEUE_MONITOR_DELAY) + 's...'
            time.sleep(PBS_QUEUE_MONITOR_DELAY)

    # return anything?
    # write one last time?
    for i in task_summaries:
        if not 'task_summary_filename' in i['filenames'].keys():
            raise NotImplementedError(
                'should input the task summary filename (not the summary itself)...'
            )
        else:
            # write it out
            write_task_summary(i, i['filenames']['task_summary_filename'])
def run_rosetta_relax( pdb_filename , extra_options = {} , run = True , parallel = ROSETTA_RELAX_PARALLEL ):
    root_filename = pdb_filename.rstrip( '.pdb' )
    
    # collect the options, set the input, derive the output filenames
    relax_options = {}
    relax_options.update( ROSETTA_RELAX_OPTIONS )
    relax_options.update( extra_options )
    relax_options['s'] = pdb_filename
    relax_options['native'] = pdb_filename    # required to get gdtmm scores
    for i in relax_options.keys():
        if '__call__' in dir( relax_options[i] ):
            relax_options[i] = relax_options[i]( root_filename )

    # ...weird Rosetta append behavior...
#    if os.path.isfile( relax_options['out:file:silent'] ):
#        os.remove( relax_options['out:file:silent'] )
#    if os.path.isfile( relax_options['out:file:scorefile'] ):
#        os.remove( relax_options['out:file:scorefile'] )


    # for njc parallelization
    nstruct = int( relax_options.get( 'nstruct' , '0' ) )
    parallel = int( parallel )
    tmp_file = None
    if nstruct > 1 and parallel > 1:
        relax_options['nstruct'] = 1 #TODO: Add chunking option?
        score_filename = relax_options['out:file:scorefile']
        silent_filename = relax_options['out:file:silent']

        if 'run:jran' in relax_options:
            restoreJran = True
            jran = int( relax_options['run:jran'] )
        else:
            restoreJran = False
            jran = 123

        tmp_file = tempfile.NamedTemporaryFile( delete = False )
        print 'Parallel relax commands are in ' + tmp_file.name

        for s in xrange( nstruct ):
            tag = '_%05d' % s
            relax_options['run:jran'] = jran*nstruct + s
            relax_options['out:file:scorefile'] = score_filename + tag
            relax_options['out:file:silent'] = silent_filename + tag
            print >>tmp_file , create_executable_str( PATH_TO_ROSETTA_RELAX , args = [] , options = relax_options ) + " > %s 2>&1; echo '[[VIPURLOG]]' %s %d" % ((silent_filename + tag).replace( 'silent_' , 'log_' ) , pdb_filename , s + 1 )

        tmp_file.close()
        # the "find ... | xargs ..." idiom is used just in case nstruct is ever a *very* large number.
        command = '''\
parallel -j %d -a %s
find . -name '%s_[0-9]*[0-9]' | xargs cat | awk 'NR == 1 || $2 != "score" {print $0}' > %s
find . -name '%s_[0-9]*[0-9]' | xargs rm
find . -name '%s_[0-9]*[0-9]' | xargs cat | awk 'NR <= 2 || !($2 == "score" || $1 == "SEQUENCE:") {print $0}' > %s
find . -name '%s_[0-9]*[0-9]' | xargs rm
''' % (parallel , tmp_file.name , score_filename , score_filename , score_filename , silent_filename , silent_filename , silent_filename)
        print 'Parallel relax driver command:', command

        # restore option values
        relax_options['nstruct'] = str( nstruct )
        relax_options['out:file:scorefile'] = score_filename
        relax_options['out:file:silent'] = silent_filename
        if restoreJran:
            relax_options['run:jran'] = jran

        if run:
            return (command , tmp_file.name , score_filename , silent_filename)

        if tmp_file:
            os.unlink( tmp_file.name )
    else:
        command = create_executable_str( PATH_TO_ROSETTA_RELAX , args = [] , options = relax_options )

    if run:
        run_local_commandline( command )
    
#    command = create_executable_str( PATH_TO_ROSETTA_RELAX , args = [] , options = relax_options )

#    run_local_commandline( command )
    
    # the only output we need
#    return relax_options['out:file:scorefile']
    return relax_options['out:file:silent']
Exemplo n.º 13
0
def run_VIPUR_PBS( pdb_filename = '' , variants_filename = '' ,
        out_path = '' , write_numbering_map = True ,
        single_relax = True , delete_intermediate_relax_files = True ,
        demo = False , rerun_preprocessing = False ):
    # for the example input
    if demo:
        pdb_filename = PATH_TO_VIPUR + '/example_input/2C35.pdb'
        variants_filename = PATH_TO_VIPUR + '/example_input/2C35.txt'

        out_path = PATH_TO_VIPUR + '/example_output'

    # alternatively, run on an entire directory
    if not pdb_filename and not variants_filename:
        # current directory
        print 'no input provided, assuming you want to run on every (.pdb,.txt) file pair found in the current directory'
        pdb_filename = os.getcwd()

    if os.path.isdir( pdb_filename ) and not variants_filename:
        # assume variants_filename from pdb_filename
        variants_filename = '.txt'

    if os.path.isdir( pdb_filename ) and variants_filename[0] == '.':
        # look for file extension
        # instead, run on the directory
        if not out_path:
            out_path = os.path.abspath( pdb_filename )
#            print out_path
        
        fa_filenames = [(out_path +'/')*bool( out_path ) + i for i in os.listdir( pdb_filename ) if get_file_extension( i ) == 'fa']
        fa_filenames = [[i , get_root_filename( i ) + variants_filename] for i in fa_filenames if os.path.isfile( get_root_filename( i ) + variants_filename ) and not os.path.isfile( get_root_filename( i ) + '.pdb' )]

        print 'running VIPUR on all (.pdb,' + variants_filename + ') file pairs found in ' + pdb_filename
        # find .pdb files
        pdb_filenames = [(out_path +'/')*bool( out_path ) + i for i in os.listdir( pdb_filename ) if get_file_extension( i ) == 'pdb']

        # look for pairs
        pdb_filenames = [[i , get_root_filename( i ) + variants_filename] for i in pdb_filenames if os.path.isfile( get_root_filename( i ) + variants_filename )]
#        print [i for i in pdb_filenames if os.path.isfile( pdb_filename +'/'+ get_root_filename( i ) + variants_filename )]

        print str( len( pdb_filenames ) ) + ' pairs found'
        print str( len( fa_filenames ) ) + ' pairs found (for sequence only)'

        # go there...
#        os.chdir( pdb_filename )

        if not pdb_filenames:
            if not fa_filenames:
                raise IOError( '!!! no (.pdb,' + variants_filename + ') file pairs found in ' + pdb_filename + '!!?!\nAND no (.fa,' + variants_filename + ') file pairs were found...' )
            else:
                print '...only (.fa,' + variants_filename + ') file pairs were found, running in sequence only mode'

    else:
        # file extension etc.
        file_extension = get_file_extension( pdb_filename )
        root_filename = get_root_filename( pdb_filename )

        # normal execution, generalize by turning into list
        pdb_filenames = []
        fa_filenames = []
        if file_extension == 'pdb':
            pdb_filenames = [[(out_path +'/')*bool( out_path ) + pdb_filename , (out_path +'/')*bool( out_path ) + variants_filename]]
        else:
            fa_filenames = [[]]


    # combine all "filenames" to run into unified framework
    target_proteins = []#None]*(len( pdb_filenames ) + len( fa_filenames ))
    for i in pdb_filenames:
        this_out_path = get_root_filename( i[0] ) +'_VIPUR'    # directory to create
        target_proteins.append( i + [False , this_out_path] )
    for i in fa_filenames:
        this_out_path = get_root_filename( i[0] ) +'_VIPUR'    # directory to create
        target_proteins.append( i + [True , this_out_path] )


    # pre processing
    task_summaries = []
    for i in target_proteins:
        # guess what the task summary filename 'would' be, if it exists, keep going...
        task_summary_filename = i[3]*bool( i[3] ) +'/'+ get_root_filename( i[0] ).split( '/' )[-1] + '.task_summary'
        if os.path.isfile( task_summary_filename ) and not rerun_preprocessing:
            print 'hmmm, ' + i[0] + ' seems to have run preprocessing already, skipping now'
            #continue    # skip this one, do not add to list of tasks...?
            # actually, skip running pre-processing BUT DO add it to the list of tasks
        else:
            task_summary_filename = run_preprocessing( i[0] , i[1] ,
                sequence_only = i[2] , out_path = i[3] ,
                task_summary_filename = task_summary_filename ,
                write_numbering_map = write_numbering_map , single_relax = single_relax )


        # modify for PBS script
        task_summary = load_task_summary( task_summary_filename )
        for j in xrange( task_summary['commands'] ):
            pbs_options = {}

            command = task_summary['commands'][j]

            # add for relax            
            if task_summary['commands'][j]['feature'].replace( '_native' , '' ) == 'relax' and not 'rescore' in task_summary['commands'][j]['feature']:
                command = command.replace( '.linuxgccrelease' , '.mpi.linuxgccrelease' )
                command = 'module load mvapich2/gnu/1.8.1; /share/apps/mvapich2/1.8.1/gnu/bin/mpiexec -n 36 ' + command
                command += ' -jd2:mpi_file_buf_job_distributor false'
                command += ' -run:multiple_processes_writing_to_one_directory'
                
                # also use the parallel options
                pbs_options.update( PBS_PARALLEL_JOB_OPTIONS )
            else:
                pbs_options.update( PBS_SERIAL_JOB_OPTIONS )

            # put "cd" in front
            command = ('cd '+ i[3] +';')*bool( i[3] ) + command
            
            # modify the task summary
            task_summary['commands'][j] = command
            
            
            # actually write the script...
            # don't worry about optional #PBS header info
            script_filename = i[3] + '/'*bool( i[3] ) + get_root_filename( i[0] ).split( '/' )[-1] +'.'+ task_summary['commands'][j]['feature'] + '.pbs_script.sh'
            task_summary['commands'][j]['script_filename'] = script_filename
            
            f = open( script_filename , 'w' )
            f.write( command )
            f.close()
            
            # use the script filename as the source for any log files
            # control the output and error paths
            for i in pbs_options.keys():
                if '__call__' in dir( pbs_options[i] ):
                    pbs_options[i] = pbs_options[i]( script_filename )
            
            # also generate the pbs call? might as well, keep it simple...
            task_summary['commands'][j]['qsub_command'] = create_executable_str( 'qsub' , [script_filename] , pbs_options )

        # rewrite the task summary
        write_task_summary( task_summary_filename )

        task_summaries.append( task_summary_filename )


    # run them all
#    run_VIPUR_task_summaries_serially( task_summaries , single_relax = single_relax , delete_intermediate_relax_files = delete_intermediate_relax_files )
    run_VIPUR_task_summaries_PBS( task_summaries , single_relax = single_relax , delete_intermediate_relax_files = delete_intermediate_relax_files )


    # post processing
    # this look identical!!! :)
    for i in xrange( len( task_summaries ) ):
        # always okay to rerun post processing...should not make any difference
        sequence_only = target_proteins[i][2]
        task_summaries[i] = run_postprocessing( task_summaries[i] , sequence_only = sequence_only )

    return task_summaries
Exemplo n.º 14
0
def run_VIPUR_SLURM( pdb_filename = '' , variants_filename = '' ,
        out_path = '' , write_numbering_map = True ,
        single_relax = False , delete_intermediate_relax_files = True ,
        demo = False , rerun_preprocessing = False ):
    # the following should probably be a separate method...

    # for the example input
    if demo:
        pdb_filename = PATH_TO_VIPUR + '/example_input/2C35.pdb'
        variants_filename = PATH_TO_VIPUR + '/example_input/2C35.txt'

        out_path = PATH_TO_VIPUR + '/example_output'

    # alternatively, run on an entire directory
    if not pdb_filename and not variants_filename:
        # current directory
        print 'no input provided, assuming you want to run on every (.pdb,.txt) file pair found in the current directory'
        pdb_filename = os.getcwd()

    if os.path.isdir( pdb_filename ) and not variants_filename:
        # assume variants_filename from pdb_filename
        variants_filename = '.txt'

    if os.path.isdir( pdb_filename ) and variants_filename[0] == '.':
        # look for file extension
        # instead, run on the directory
        if not out_path:
            out_path = os.path.abspath( pdb_filename )
#            print out_path
        
        fa_filenames = [(out_path +'/')*bool( out_path ) + i for i in os.listdir( pdb_filename ) if get_file_extension( i ) == 'fa']
        fa_filenames = [[i , get_root_filename( i ) + variants_filename] for i in fa_filenames if os.path.isfile( get_root_filename( i ) + variants_filename ) and not os.path.isfile( get_root_filename( i ) + '.pdb' )]

        print 'running VIPUR on all (.pdb,' + variants_filename + ') file pairs found in ' + pdb_filename
        # find .pdb files
        pdb_filenames = [(out_path +'/')*bool( out_path ) + i for i in os.listdir( pdb_filename ) if get_file_extension( i ) == 'pdb']

        # look for pairs
        pdb_filenames = [[i , get_root_filename( i ) + variants_filename] for i in pdb_filenames if os.path.isfile( get_root_filename( i ) + variants_filename )]
#        print [i for i in pdb_filenames if os.path.isfile( pdb_filename +'/'+ get_root_filename( i ) + variants_filename )]

        print str( len( pdb_filenames ) ) + ' pairs found'
        print str( len( fa_filenames ) ) + ' pairs found for sequence only mode'

        # go there...
#        os.chdir( pdb_filename )

        if not pdb_filenames:
            if not fa_filenames:
                raise IOError( '!!! no (.pdb,' + variants_filename + ') file pairs found in ' + pdb_filename + '!!?!\nAND no (.fa,' + variants_filename + ') file pairs were found...' )
            else:
                print '...only (.fa,' + variants_filename + ') file pairs were found, running in sequence only mode'

    else:
        # file extension etc.
        file_extension = get_file_extension( pdb_filename )
        root_filename = get_root_filename( pdb_filename )

        # normal execution, generalize by turning into list
        pdb_filenames = []
        fa_filenames = []
        if file_extension == 'pdb':
            pdb_filenames = [[(out_path +'/')*bool( out_path ) + pdb_filename , (out_path +'/')*bool( out_path ) + variants_filename]]
        else:
            fa_filenames = [[]]


    # combine all "filenames" to run into unified framework
    target_proteins = []#None]*(len( pdb_filenames ) + len( fa_filenames ))
    for i in pdb_filenames:
        this_out_path = get_root_filename( i[0] ) +'_VIPUR'    # directory to create
        target_proteins.append( i + [False , this_out_path] )
    for i in fa_filenames:
        this_out_path = get_root_filename( i[0] ) +'_VIPUR'    # directory to create
        target_proteins.append( i + [True , this_out_path] )

    # setup environment variables BEFORE pre processing
    # not needed with current SLURM setup...

    # pre processing
    task_summaries = []
    for i in target_proteins:
        # guess what the task summary filename 'would' be, if it exists, keep going...
        task_summary_filename = i[3]*bool( i[3] ) +'/'+ get_root_filename( i[0] ).split( '/' )[-1] + '.task_summary'
        if os.path.isfile( task_summary_filename ) and not rerun_preprocessing:
            print 'hmmm, ' + i[0] + ' seems to have run preprocessing already, skipping now'
            #continue    # skip this one, do not add to list of tasks...?
            # actually, skip running pre-processing BUT DO add it to the list of tasks
            
            # is this actually working?
        else:
            task_summary_filename = run_preprocessing( i[0] , i[1] ,
                sequence_only = i[2] , out_path = i[3] ,
                task_summary_filename = task_summary_filename ,
                write_numbering_map = write_numbering_map , single_relax = single_relax )


        # modify for SLURM script
        task_summary = load_task_summary( task_summary_filename )
        
#        task_summary['filenames']['slurm_script_filename'] = 'slurm_' + get_root_filename( i[0] ) + '.sh'
        task_summary['filenames']['slurm_script_filename'] = out_path + '/slurm_script_this_batch.sh'
        task_summary['filenames']['slurm_output_filename'] = out_path + '/slurm_output_batch.out'
        task_summary['filenames']['slurm_error_filename'] = out_path + '/slurm_error_batch.err'
        # ...awkward...they all have individual task summarization of the same script...but nowhere else to put it...
        
        for j in xrange( len( task_summary['commands'] ) ):
            slurm_options = {}

            command = task_summary['commands'][j]['command']

            # add for relax
            if task_summary['commands'][j]['feature'].replace( '_native' , '' ) == 'relax' and not 'rescore' in task_summary['commands'][j]['feature']:
                command = command.replace( '.linuxgccrelease' , '.mpi.linuxgccrelease' )
#                command = 'module load mvapich2/gnu/1.8.1;/share/apps/mvapich2/1.8.1/gnu/bin/mpiexec -n 36 ' + command
                command = 'mpiexec -n 40 ' + command
                command += ' -jd2:mpi_file_buf_job_distributor false'
                command += ' -run:multiple_processes_writing_to_one_directory'
                
                # also use the parallel options
#                pbs_options.update( PBS_PARALLEL_JOB_OPTIONS )
#            else:

            slurm_options.update( SLURM_JOB_OPTIONS )

            # put "cd" in front
#            command = ('#!/bin/bash\n\ncd '+ i[3] +'\n\n')*bool( i[3] ) + command +'\n\n'
#            command = ('cd '+ i[3] +';')*bool( i[3] ) + command    # not needed for slurm, use abspaths and one big batch

            # special...
            if task_summary['commands'][j]['feature'] == 'psiblast' and not 'num_threads' in task_summary['commands'][j]['command']:
                task_summary['commands'][j]['command'] += ' -num_threads 40'
            
            # modify the task summary
            task_summary['commands'][j]['command'] = command
            
            # MUST still do ddg_monomer on single process...
            if 'ddg_monomer' in task_summary['commands'][j]['feature'] or 'rescore' in task_summary['commands'][j]['feature']:
                if 'rescore' in task_summary['commands'][j]['feature']:
                    # sanity check
                    if not 'variant' in task_summary['commands'][j].keys():
                        raise Exception( 'rescore command without the variant information...!?' )
                    
                    # need variant in the script, otherwise overwrite :(
                    script_filename = i[3] + '/'*bool( i[3] ) + get_root_filename( i[0] ).split( '/' )[-1] +'.'+ task_summary['commands'][j]['feature'] +'_'+ task_summary['commands'][j]['variant'] + '.slurm_script.sh'
                else:
                    # ddg monomer is "per protein", no need for more detail
                    script_filename = i[3] + '/'*bool( i[3] ) + get_root_filename( i[0] ).split( '/' )[-1] +'.'+ task_summary['commands'][j]['feature'] + '.slurm_script.sh'
                task_summary['commands'][j]['script_filename'] = script_filename

                # only write ONE submission script per batch = run of VIPUR           
                f = open( script_filename , 'w' )
                f.write( SLURM_BASH_SCRIPT( command ) )
                f.close()
            
                # use the script filename as the source for any log files
                # control the output and error paths
                for k in slurm_options.keys():
                    if '__call__' in dir( slurm_options[k] ):
                        slurm_options[k] = slurm_options[k]( script_filename )

                slurm_options['N'] = '1'
                slurm_options['n'] = '1'

                # also generate the pbs call? might as well, keep it simple...
                # srun or sbatch?
                task_summary['commands'][j]['sbatch_command'] = create_executable_str( 'sbatch' , [script_filename] , slurm_options )
            
            
            # actually write the script...
            # don't worry about optional #PBS header info
#            script_filename = i[3] + '/'*bool( i[3] ) + get_root_filename( i[0] ).split( '/' )[-1] +'.'+ task_summary['commands'][j]['feature'] + '.slurm_script.sh'
#            task_summary['commands'][j]['script_filename'] = script_filename

            # only write ONE submission script per batch = run of VIPUR           
#            f = open( script_filename , 'w' )
#            f.write( SLURM_BASH_SCRIPT( command ) )
#            f.close()
            
            # use the script filename as the source for any log files
            # control the output and error paths
#            for k in slurm_options.keys():
#                if '__call__' in dir( slurm_options[k] ):
#                    slurm_options[k] = slurm_options[k]( script_filename )

            # also generate the pbs call? might as well, keep it simple...
            # srun or sbatch?
#            task_summary['commands'][j]['srun_command'] = create_executable_str( 'srun' , [script_filename] , slurm_options )

        # rewrite the task summary
        write_task_summary( task_summary , task_summary_filename )

        task_summaries.append( task_summary )#_filename )


    # run them all
#    run_VIPUR_task_summaries_serially( task_summaries , single_relax = single_relax , delete_intermediate_relax_files = delete_intermediate_relax_files )
    run_VIPUR_task_summaries_SLURM( task_summaries , single_relax = single_relax , delete_intermediate_relax_files = delete_intermediate_relax_files )


    # post processing
    # this look identical!!! :)
    for i in xrange( len( task_summaries ) ):
        # always okay to rerun post processing...should not make any difference
        sequence_only = target_proteins[i][2]
        task_summaries[i] = run_postprocessing( task_summaries[i] , sequence_only = sequence_only )

    return task_summaries
Exemplo n.º 15
0
def run_VIPUR_tasks_PBS( task_summaries , task_list , max_pbs_tries = 2 , ddg_monomer_cleanup = True , single_relax = True , delete_intermediate_relax_files = False ):
    # run the non_rescore tasks
    completed = [i for i in task_list if 'run' in task_summaries[i[0]]['commands'][i[1]] and 'success' in task_summaries[i[0]]['commands'][i[1]]['run']]
    # should running_or_queued be saved? written to file?
    running_or_queued = {}
    rounds = 0
    all_completed_jobs = []    # prevents annoying bulk output, only see it the first time it completes
    while not len( completed ) == len( task_list ):
        rounds += 1
        print '\n\nQUEUE MONITOR ROUND ' + str( rounds )
        
        # debug
#        print running_or_queued
    
        # check queue status
        queue_status = get_pbs_queue_status()

        # update "running_or_queued" list (?)
        # err, no, does not have information on which job it is...:(
        #for i in queue_status.keys():
        #    if queue_status[i] in ['R' , 'Q']:

        queue_space_occupied = len( [i for i in queue_status.values() if not i in ['C' , 'R']] )    # ignore "C"ompleted jobs, "R"unning job quota are not set by us...
        # if your queue system does not have a separate "R"un quota, remove 'R' from the above!
        available_space = PBS_QUEUE_QUOTA - queue_space_occupied

        
        # launch next jobs in available slots
        if available_space:
            print str( queue_space_occupied ) + ' jobs queued or running, could submit up to ' + str( available_space ) + ' more'
            # choose the next job
            jobs_to_run = [i for i in task_list if
                not i in completed and
                not i in running_or_queued.values() and
                not ( 'run' in task_summaries[i[0]]['commands'][i[1]] and
                    ('success' in task_summaries[i[0]]['commands'][i[1]]['run'] or
                    'failure' in task_summaries[i[0]]['commands'][i[1]]['run']) )
                ]
            print str( len( jobs_to_run ) ) + ' jobs still need to finish (after the currently running jobs complete)'
            
            # only the next few
            for i in jobs_to_run[:available_space]:
                command_dict = task_summaries[i[0]]['commands'][i[1]]
            
                # write scripts as part of pre processing?...yeah...
                # write the command to a script
                #script_filename = command_dict['out_path'] +'/'*bool( command_dict['out_path'] )+
                #script_filename = command_dict['script_filename']
            
            
                # if its a rescore and relax jobs were separated, need to recombine them!
                if 'rescore' in command_dict['feature']:
                    # combine the individual relax runs
                    #relax_commands = [i for i in task_summary['commands'] if i['feature'].replace( '_native' , '' ) == 'relax']
                    #silent_filenames = [j['output_filename'] for j in relax_commands if j['variant'] == i['variant'] and 'run' in j.keys() and j['run'] == 'success']
                    silent_filenames = [j['output_filename'] for j in task_summaries[i[0]]['commands'] if
                        j['feature'].replace( '_native' , '' ) == 'relax' and
                        j['variant'] == command_dict['variant'] and
                        'run' in j.keys() and
                        'success' in j['run']
                        ]
                    # actually need to identify the combined_silent_filename, be sure the relax files have not already been merged
                    # which variant
                    target_variant = [j for j in task_summaries[i[0]]['variants'].keys() if j.split( '_' )[-1] == command_dict['variant'] and j.split( '_' )[0] in command_dict['command']]
                    if not target_variant:
                        # its native
                        combined_silent_filename = task_summaries[i[0]]['other']['combined_native_silent_filename']
                        combined_score_filename = task_summaries[i[0]]['other']['combined_native_score_filename']
                    elif len( target_variant ) > 1:
                        raise Exception( '??? found more than on matching variant ???\n' + ', '.join( target_variant ) )
                    else:
                        # found it
                        combined_silent_filename = task_summaries[i[0]]['variants'][target_variant[0]]['combined_silent_filename']
                        combined_score_filename = task_summaries[i[0]]['variants'][target_variant[0]]['combined_score_filename']

                    #if not single_relax:    # AND post processing has not already be run...scan for the combined silent file
                    if not single_relax and not os.path.isfile( combined_silent_filename ):
                        if not len( silent_filenames ) == ROSETTA_RELAX_OPTIONS['nstruct']:
                            raise Exception( '??? somehow the matching relax run(s) has failed ???\n' + str( i ) )
                        score_filenames = [j.replace( '.silent' , '.sc' ) for j in silent_filenames]

                        merge_rosetta_relax_output( silent_filenames , combined_silent_filename , score_filenames , combined_score_filename , delete_old_files = delete_intermediate_relax_files )
                        # rescore already knows the proper filename
                    else:
                        # just a single match for each
                        # output filename should be correct as is :)
                        None

            
                # submit this script using a queue command
                # generate it here instead
                pbs_options = {}
                if command_dict['queue'] == 'parallel':
                    pbs_options.update( PBS_PARALLEL_JOB_OPTIONS )
                elif command_dict['queue'] == 'serial':
                    pbs_options.update( PBS_SERIAL_JOB_OPTIONS )
                # make sure they are satisfied
                script_filename = command_dict['script_filename']
                for k in pbs_options.keys():
                    if '__call__' in dir( pbs_options[k] ):
                        pbs_options[k] = pbs_options[k]( script_filename )

                pbs_command = create_executable_str( 'qsub' , [script_filename] , pbs_options )
                new_job_id = run_local_commandline( pbs_command , collect_stdout = True )
                new_job_id = new_job_id.strip()
                if '.' in new_job_id:
                    new_job_id = new_job_id[:new_job_id.find( '.' )]
                print 'submitted ' + new_job_id
                
                # save the job id
                # assume its queue
                running_or_queued[new_job_id] = i

        else:
            print 'no new \"positions\" are available'

        # debug, need to know
        running_jobs = len( [i for i in queue_status.values() if i in ['R']] )
        if running_jobs:
            print str( running_jobs ) + ' are still running...(excluding the jobs just submitted and including your other jobs)'
        
        # assess outcome of completed jobs
        for job_id in sorted( queue_status.keys() ):    # sort in numerical order, right?
            # debug
            if not job_id in all_completed_jobs:
                print '\t'+ job_id , queue_status[job_id]# , job_id in running_or_queued.keys()
                # could just skip it all now?
        
            if queue_status[job_id] == 'C' and job_id in running_or_queued.keys():
                task_id = running_or_queued[job_id][0]
                command_index = running_or_queued[job_id][1]
                command_dict = task_summaries[task_id]['commands'][command_index]

                check_successful = determine_check_successful_function( command_dict , single_relax = single_relax )

                success = check_successful( command_dict )

                failure_summary = ''
                if isinstance( success , bool ):
                    complete = success
                elif len( success ) > 1 and isinstance( success[0] , bool ):
                    complete = success[0]
                    failure_summary += ' '+ ';'.join( [str( j ) for j in success[1:]] ) +' '
                    print complete , failure_summary , 'try again?'*bool( not complete )    # debug

                # track the number of attempts?
                # try until failure - how many times?
                tries = 0
                if 'run' in command_dict.keys() and command_dict['run'] and not 'success' in command_dict['run'] and not 'failure' in command_dict['run']:
                    tries = int( command_dict['run'] )
                tries += 1
                print tries , 'attempts so far'    # debug
                
                if tries >= max_pbs_tries:
                    # its a failure
                    print job_id + ' completed successfully'*complete + (' failed with ' + str( tries ) + ' attempts')*(not complete)
                    failure_summary = 'success'*complete + (str( tries ) +' tries;failure ' + failure_summary)*(not complete)
                elif complete:
                    print job_id + ' completed successfully'
                    failure_summary = 'success' #+ str( tries ) + ' tries'
                else:
                    # record the number of tries
                    print job_id + ' completed' + ' successfully'*complete
                    failure_summary = str( tries )
                
                # update the record
                print 'updating with: ' + failure_summary    # debug
                task_summaries[task_id]['commands'][command_index]['run'] = failure_summary
            
                # optionally cleanup
                if ddg_monomer_cleanup and command_dict['feature'] == 'ddg_monomer':#'ddg' in i['output_filename']:
                    print 'ddg_monomer writes useless output files, deleting these now...'
                    remove_intermediate_ddg_monomer_files()

                # jobs that have since been completed - consider them complete?
                completed.append( running_or_queued[job_id] )    # good, so this grows
                del running_or_queued[job_id]
                # remove jobs to run?
#                print 'updating the status...'    # debug

                # write out "completed"? or "running_or_queued"?

            if queue_status[job_id] == 'C' and not job_id in all_completed_jobs:
                all_completed_jobs.append( job_id )    # prevent redundant update info


        # update task_summaries e.g. write them!
        # modified: so the task summary records its own name...bah!
        for i in task_summaries:
            if not 'task_summary_filename' in i['filenames'].keys():
                raise NotImplementedError( 'should input the task summary filename (not the summary itself)...' )
            else:
                # write it out
                print 'updating: ' + i['filenames']['task_summary_filename']
                write_task_summary( i , i['filenames']['task_summary_filename'] )

        
        # pause...
        print '\n' , len( completed ) , 'completed' , len( task_list ) , 'tasks remaining'    # debug
        if len( completed ) <= len( task_list ):    # no need for edge-case end wait
            print 'waiting ' + str( PBS_QUEUE_MONITOR_DELAY ) +'s...'
            time.sleep( PBS_QUEUE_MONITOR_DELAY )


    # return anything?
    # write one last time?
    for i in task_summaries:
        if not 'task_summary_filename' in i['filenames'].keys():
            raise NotImplementedError( 'should input the task summary filename (not the summary itself)...' )
        else:
            # write it out
            write_task_summary( i , i['filenames']['task_summary_filename'] )
Exemplo n.º 16
0
def run_rosetta_relax(pdb_filename,
                      extra_options={},
                      run=True,
                      parallel=ROSETTA_RELAX_PARALLEL):
    root_filename = pdb_filename.rstrip('.pdb')

    # collect the options, set the input, derive the output filenames
    relax_options = {}
    relax_options.update(ROSETTA_RELAX_OPTIONS)
    relax_options.update(extra_options)
    relax_options['s'] = pdb_filename
    relax_options['native'] = pdb_filename  # required to get gdtmm scores
    for i in relax_options.keys():
        if '__call__' in dir(relax_options[i]):
            relax_options[i] = relax_options[i](root_filename)

    # ...weird Rosetta append behavior...
#    if os.path.isfile( relax_options['out:file:silent'] ):
#        os.remove( relax_options['out:file:silent'] )
#    if os.path.isfile( relax_options['out:file:scorefile'] ):
#        os.remove( relax_options['out:file:scorefile'] )

# for njc parallelization
    nstruct = int(relax_options.get('nstruct', '0'))
    parallel = int(parallel)
    tmp_file = None
    if nstruct > 1 and parallel > 1:
        relax_options['nstruct'] = 1  #TODO: Add chunking option?
        score_filename = relax_options['out:file:scorefile']
        silent_filename = relax_options['out:file:silent']

        if 'run:jran' in relax_options:
            restoreJran = True
            jran = int(relax_options['run:jran'])
        else:
            restoreJran = False
            jran = 123

        tmp_file = tempfile.NamedTemporaryFile(delete=False)
        print 'Parallel relax commands are in ' + tmp_file.name

        for s in xrange(nstruct):
            tag = '_%05d' % s
            relax_options['run:jran'] = jran * nstruct + s
            relax_options['out:file:scorefile'] = score_filename + tag
            relax_options['out:file:silent'] = silent_filename + tag
            print >> tmp_file, create_executable_str(
                PATH_TO_ROSETTA_RELAX, args=[], options=relax_options
            ) + " > %s 2>&1; echo '[[VIPURLOG]]' %s %d" % (
                (silent_filename + tag).replace('silent_',
                                                'log_'), pdb_filename, s + 1)

        tmp_file.close()
        # the "find ... | xargs ..." idiom is used just in case nstruct is ever a *very* large number.
        command = '''\
parallel -j %d -a %s
find . -name '%s_[0-9]*[0-9]' | xargs cat | awk 'NR == 1 || $2 != "score" {print $0}' > %s
find . -name '%s_[0-9]*[0-9]' | xargs rm
find . -name '%s_[0-9]*[0-9]' | xargs cat | awk 'NR <= 2 || !($2 == "score" || $1 == "SEQUENCE:") {print $0}' > %s
find . -name '%s_[0-9]*[0-9]' | xargs rm
''' % (parallel, tmp_file.name, score_filename, score_filename, score_filename,
        silent_filename, silent_filename, silent_filename)
        print 'Parallel relax driver command:', command

        # restore option values
        relax_options['nstruct'] = str(nstruct)
        relax_options['out:file:scorefile'] = score_filename
        relax_options['out:file:silent'] = silent_filename
        if restoreJran:
            relax_options['run:jran'] = jran

        if run:
            return (command, tmp_file.name, score_filename, silent_filename)

        if tmp_file:
            os.unlink(tmp_file.name)
    else:
        command = create_executable_str(PATH_TO_ROSETTA_RELAX,
                                        args=[],
                                        options=relax_options)

    if run:
        run_local_commandline(command)

    # the only output we need
#    return relax_options['out:file:scorefile']
    return relax_options['out:file:silent']