def get_benchmark_list_by_name(database_name): with database.connect(db_name=database_name) as session: return [ r.name for r in session.execute( 'SELECT DISTINCT name from benchmarks ORDER BY benchmark_id DESC' ) ]
def resume_benchmark(benchmark_id, nstruct=None): qsub_command = 'qsub', benchmark_command = 'loop_benchmark.py', benchmark_id # You get weird errors if you forget to cast nstruct from string to int. if nstruct is not None: nstruct = int(nstruct) # Read the job parameters from the database. with database.connect() as session: benchmark = session.query(database.Benchmarks).get(benchmark_id) num_pdbs = len(benchmark.input_pdbs) # Make sure the right version of rosetta is being used. git_commit = subprocess.check_output( shlex.split('git rev-parse HEAD'), cwd=settings.rosetta).strip() git_diff = subprocess.check_output( shlex.split('git diff'), cwd=settings.rosetta).strip() if benchmark.git_commit != git_commit: message = "Benchmark \"{0}\" was run with rosetta commit #{1}, but commit #{2} is currently checked out. Press [Ctrl-C] to abort or [Enter] to continue." message = textwrap.fill(message.format(benchmark.id, benchmark.git_commit[:8], git_commit[:8])) raw_input(message) elif benchmark.git_diff != git_diff: message = "Uncommitted changes have been made to rosetta since benchmark \"{0}\" was run. Press [Ctrl-C] to abort or [Enter] to continue." message = textwrap.fill(message.format(benchmark.id)) raw_input(message) # Build the qsub command. if benchmark.fast: qsub_command += '-t', '1-{0}'.format((nstruct or 10) * num_pdbs) qsub_command += '-l', 'h_rt=0:30:00' else: qsub_command += '-t', '1-{0}'.format((nstruct or 500) * num_pdbs) qsub_command += '-l', 'h_rt=4:00:00' print "Your benchmark \"{0}\" (id={1}) is being resumed".format( benchmark.name, benchmark_id) # Submit the job. utilities.clear_directory('job_output') qsub_command += '-o', 'job_output', '-e', 'job_output' subprocess.call(qsub_command + benchmark_command)
from libraries import utilities from libraries import settings; settings.load(interactive=False) from libraries import database # Parse arguments. if len(sys.argv) != 2 or 'SGE_TASK_ID' not in os.environ: print 'Usage: SGE_TASK_ID=<id> loop_benchmark.py <benchmark_id>' sys.exit(1) task_id = int(os.environ['SGE_TASK_ID']) - 1 benchmark_id = int(sys.argv[1]) # Figure out which loop to benchmark. with database.connect() as session: benchmark = session.query(database.Benchmarks).get(benchmark_id) script_path = benchmark.rosetta_script script_vars = json.loads(benchmark.rosetta_script_vars or '[]') flags_path = benchmark.rosetta_flags fragments_path = benchmark.rosetta_fragments fast = benchmark.fast input_pdbs = benchmark.input_pdbs pdb_path = input_pdbs[task_id % len(input_pdbs)].pdb_path pdb_tag = os.path.splitext(os.path.basename(pdb_path))[0] loop_path = re.sub('\.pdb(\.gz)?$', '.loop', pdb_path) non_random = benchmark.non_random # Set LD_LIBRARY_PATH so that the MySQL libraries can be found. rosetta_env = os.environ.copy()
r.name for r in session.execute( 'SELECT DISTINCT name from benchmarks ORDER BY benchmark_id DESC' ) ] def get_progress(database_name, benchmark_name): try: database.test_connect(db_name=database_name) except RuntimeError, error: print error sys.exit(1) # Create an entry in the benchmarks table. with database.connect(db_name=database_name) as session: messages = [''] # Use the latest benchmark's name if none was supplied if not benchmark_name: q = session.query(database.Benchmarks).order_by( database.Benchmarks.benchmark_id.desc()) if q.count() == 0: exit( 'There is no benchmark data in the database "{0}".'.format( database_name)) benchmark_name = q.first().name messages.append( 'No benchmark was selected. Choosing the most recent benchmark: "{0}".\n' .format(benchmark_name))
settings.load(interactive=False) from libraries import database # Parse arguments. if len(sys.argv) != 2 or 'SGE_TASK_ID' not in os.environ: print 'Usage: SGE_TASK_ID=<id> loop_benchmark.py <benchmark_id>' sys.exit(1) task_id = int(os.environ['SGE_TASK_ID']) - 1 benchmark_id = int(sys.argv[1]) # Figure out which loop to benchmark. with database.connect() as session: benchmark = session.query(database.Benchmarks).get(benchmark_id) script_path = benchmark.rosetta_script script_vars = json.loads(benchmark.rosetta_script_vars or '[]') flags_path = benchmark.rosetta_flags fragments_path = benchmark.rosetta_fragments fast = benchmark.fast input_pdbs = benchmark.input_pdbs pdb_path = input_pdbs[task_id % len(input_pdbs)].pdb_path pdb_tag = os.path.splitext(os.path.basename(pdb_path))[0] loop_path = re.sub('\.pdb(\.gz)?$', '.loop', pdb_path) non_random = benchmark.non_random # Set LD_LIBRARY_PATH so that the MySQL libraries can be found. rosetta_env = os.environ.copy()
def from_database(name_or_id, group_by_name=False): from libraries import database from sqlalchemy import desc with database.connect() as session: # Decide whether a name or id was used to specify a benchmark run, # and load the corresponding data out of the database. The meaning # of name_or_id is inferred from its type: names are expected to be # strings and ids are expected to be integers. If more than one # benchmark has the same name, the most recent one will be used. db_benchmarks = [] try: id = int(name_or_id) _db_benchmark = session.query(database.Benchmarks).get(id) if _db_benchmark is None: message = "No benchmark '{}' in the database." utilities.print_error_and_die(message, id) db_benchmarks = [_db_benchmark] except ValueError: name = name_or_id query = session.query(database.Benchmarks).filter_by( name=name).order_by(desc(database.Benchmarks.start_time)) db_benchmarks = [q for q in query] if not group_by_name: if len(db_benchmarks) > 1: message = "Multiple benchmarks runs were found with the same name '{0}' (ids are: {1}). If this is expected then set the --group_by_name option.".format( name, ', '.join(map(str, [b.id for b in db_benchmarks]))) utilities.print_error_and_die(message, name) b_name = set([db_benchmark.name for db_benchmark in db_benchmarks]) assert (len(b_name) == 1) b_name = b_name.pop() b_title = set( [db_benchmark.title or '' for db_benchmark in db_benchmarks]) if len(b_title) > 1: colortext.warning( "There are multiple titles associated with benchmark {0}: '{1}'. Choosing the most recent ('{2}')." .format(b_name, "', '".join(b_title), db_benchmarks[0].title or '')) b_title = db_benchmarks[0].title else: b_title = b_title.pop() or None benchmark = Benchmark(b_name, b_title) for db_benchmark in db_benchmarks: # Fill in the benchmark data structure from the database. print( "Loading the {0} benchmark (id {1}) from the database...". format(benchmark.name, db_benchmark.id)) for db_input in db_benchmark.input_pdbs: path = db_input.pdb_path if not benchmark.loops.get(path): benchmark.loops[path] = Loop(benchmark, path) for structure in db_benchmark.structures: loop = benchmark.loops[structure.input_tag] id = len(loop.models) + 1 score = structure.score_features.score rmsd = structure.rmsd_features.protein_backbone runtime = structure.runtime_features.elapsed_time model = Model(loop, id, score, rmsd, runtime) loop.models.append(model) return benchmark
def get_benchmark_list_by_name(database_name): with database.connect(db_name = database_name) as session: return [r.name for r in session.execute('SELECT DISTINCT name from benchmarks ORDER BY benchmark_id DESC')]
def get_benchmark_list_by_name(database_name): with database.connect(db_name = database_name) as session: return [r.name for r in session.execute('SELECT DISTINCT name from benchmarks ORDER BY benchmark_id DESC')] def get_progress(database_name, benchmark_name): try: database.test_connect(db_name = database_name) except RuntimeError, error: print error sys.exit(1) # Create an entry in the benchmarks table. with database.connect(db_name = database_name) as session: messages = [''] # Use the latest benchmark's name if none was supplied if not benchmark_name: q = session.query(database.Benchmarks).order_by(database.Benchmarks.benchmark_id.desc()) if q.count() == 0: exit('There is no benchmark data in the database "{0}".'.format(database_name)) benchmark_name = q.first().name messages.append('No benchmark was selected. Choosing the most recent benchmark: "{0}".\n'.format(benchmark_name)) # Retrieve the set of benchmark runs associated the benchmark name q = session.query(database.Benchmarks).filter(database.Benchmarks.name == benchmark_name) if q.count() == 0: exit('There is no benchmark data in the database "{0}" for benchmark "{1}".'.format(database_name, benchmark_name))
def from_database(name_or_id, group_by_name = False): from libraries import database from sqlalchemy import desc with database.connect() as session: # Decide whether a name or id was used to specify a benchmark run, # and load the corresponding data out of the database. The meaning # of name_or_id is inferred from its type: names are expected to be # strings and ids are expected to be integers. If more than one # benchmark has the same name, the most recent one will be used. db_benchmarks = [] try: id = int(name_or_id) _db_benchmark = session.query(database.Benchmarks).get(id) if _db_benchmark is None: message = "No benchmark '{}' in the database." utilities.print_error_and_die(message, id) db_benchmarks = [_db_benchmark] except ValueError: name = name_or_id query = session.query(database.Benchmarks).filter_by(name=name).order_by(desc(database.Benchmarks.start_time)) db_benchmarks = [q for q in query] if not group_by_name: if len(db_benchmarks) > 1: message = "Multiple benchmarks runs were found with the same name '{0}' (ids are: {1}). If this is expected then set the --group_by_name option.".format(name, ', '.join(map(str, [b.id for b in db_benchmarks]))) utilities.print_error_and_die(message, name) b_name = set([db_benchmark.name for db_benchmark in db_benchmarks]) assert(len(b_name) == 1) b_name = b_name.pop() b_title = set([db_benchmark.title or '' for db_benchmark in db_benchmarks]) if len(b_title) > 1: colortext.warning("There are multiple titles associated with benchmark {0}: '{1}'. Choosing the most recent ('{2}').".format(b_name, "', '".join(b_title), db_benchmarks[0].title or '')) b_title = db_benchmarks[0].title else: b_title = b_title.pop() or None benchmark = Benchmark(b_name, b_title) for db_benchmark in db_benchmarks: # Fill in the benchmark data structure from the database. print "Loading the {0} benchmark (id {1}) from the database...".format(benchmark.name, db_benchmark.id) for db_input in db_benchmark.input_pdbs: path = db_input.pdb_path if not benchmark.loops.get(path): benchmark.loops[path] = Loop(benchmark, path) for structure in db_benchmark.structures: loop = benchmark.loops[structure.input_tag] id = len(loop.models) + 1 score = structure.score_features.score rmsd = structure.rmsd_features.protein_backbone runtime = structure.runtime_features.elapsed_time model = Model(loop, id, score, rmsd, runtime) loop.models.append(model) return benchmark
def complete_benchmark(benchmark_id, nstruct=None): qsub_command = 'qsub', benchmark_command = 'loop_benchmark.py', benchmark_id # You get weird errors if you forget to cast nstruct from string to int. # Get the progress data for the job progress_data = get_progress(settings.db_name, benchmark_id) # Set up nstruct nstruct = progress_data['nstruct'] if not nstruct: sys.exit('The nstruct variable is not set for this benchmark. Exiting.') # Set up the bins for structures that need extra jobs to be run. We run extra jobs in case these fail as well. bins = {5 : [], 10 : [], 20 : [], 30 : []} d_bins = bins.keys() for input_tag, finished_count in progress_data['CountPerStructure'].iteritems(): if finished_count < nstruct: missing_count = nstruct - finished_count if missing_count <= 2: bins[5].append(input_tag) elif missing_count <= 5: bins[10].append(input_tag) elif missing_count <= 10: bins[20].append(input_tag) elif missing_count <= 15: bins[30].append(input_tag) else: bin_size = ((int((missing_count - 11)/20.0) + 2) * 20) + 10 bins[bin_size] = bins.get(bin_size, []) bins[bin_size].append(input_tag) for d_bin in d_bins: if not bins[d_bin]: del bins[d_bin] with database.connect() as session: name = benchmark_id benchmark_records = [r for r in session.query(database.Benchmarks).filter(database.Benchmarks.name == benchmark_id)] print('') benchmark_variables = dict( rosetta_script = set([r.rosetta_script for r in benchmark_records]), rosetta_script_vars = [json.loads(r.rosetta_script_vars) for r in benchmark_records], rosetta_flags = set([r.rosetta_flags for r in benchmark_records]), rosetta_fragments = set([r.rosetta_fragments for r in benchmark_records]), fast = set([r.fast for r in benchmark_records]), non_random = set([r.non_random for r in benchmark_records]), ) for x in range(0, len(benchmark_variables['rosetta_script_vars']) - 1): if benchmark_variables['rosetta_script_vars'][x] != benchmark_variables['rosetta_script_vars'][x + 1]: sys.exit('Exception (ambiguity): The benchmark {0} has multiple RosettaScript variable values associated with previous runs: "{1}".'.format(benchmark_id, '", "'.join(map(str, sorted(benchmark_variables['rosetta_script_vars']))))) for k, v in sorted(benchmark_variables.iteritems()): if len(v) == 0: sys.exit('Exception (missing data): The benchmark {0} has no {1} values associated with previous runs.'.format(benchmark_id, k.replace('_', ' '))) elif k == 'rosetta_script_vars': benchmark_variables[k] = benchmark_variables[k][0] elif len(v) > 1: sys.exit('Exception (ambiguity): The benchmark {0} has multiple {1} values associated with previous runs: "{2}".'.format(benchmark_id, k.replace('_', ' '), '", "'.join(sorted(v)))) else: benchmark_variables[k] = v.pop() for nstruct, pdbs in reversed(sorted(bins.iteritems())): # start the longer jobs first run_benchmark(name, benchmark_variables['rosetta_script'], pdbs, vars=benchmark_variables['rosetta_script_vars'], flags=benchmark_variables['rosetta_flags'], fragments=benchmark_variables['rosetta_fragments'], nstruct=nstruct, desc=None, fast=benchmark_variables['fast'], non_random=benchmark_variables['non_random'])