def main(): args = docopt.docopt(__doc__) prefix = args['--prefix'] or '' workspaces = find_validation_workspaces(args['<workspace>'], args['<round>']) designs = find_reasonable_designs(workspaces, args['--threshold'], args['--verbose']) metrics = [ DesignNameMetric(), ResfileSequenceMetric(), SequenceClusterMetric(args['--subs-matrix']), StructureClusterMetric(args['--structure-threshold']), RestraintDistMetric(), ScoreGapMetric(), PercentSubangstromMetric(), BuriedUnsatHbondMetric(), DunbrackScoreMetric(), ] discover_filter_metrics(metrics, workspaces) #discover_custom_metrics(metrics, workspaces) calculate_quality_metrics(metrics, designs, args['--verbose']) designs = find_pareto_optimal_designs(designs, metrics, args['--verbose']) report_quality_metrics(designs, metrics, prefix + 'quality_metrics.xlsx') #report_score_vs_rmsd_funnels(designs, prefix + 'score_vs_rmsd.pdf') #report_pymol_sessions(designs, prefix + 'pymol_sessions') annotate_designs(designs)
def main(): args = docopt.docopt(__doc__) cluster.require_qsub() # Setup the workspace. workspace = pipeline.FixbbDesigns(args['<workspace>'], args['<round>']) workspace.check_paths() workspace.check_rosetta() workspace.make_dirs() if args['--clear'] or args['--test-run']: workspace.clear_outputs() # Decide which inputs to use. inputs = workspace.unclaimed_inputs nstruct = len(inputs) * int(args['--nstruct']) if not inputs: print """\ All the input structures have already been (or are already being) designed. If you want to rerun all the inputs from scratch, use the --clear flag.""" raise SystemExit # Submit the design job. big_jobs.submit('pip_design.py', workspace, inputs=inputs, nstruct=nstruct, max_runtime=args['--max-runtime'], max_memory=args['--max-memory'], test_run=args['--test-run'])
def main(): args = docopt.docopt(__doc__) cluster.require_qsub() workspace = pipeline.workspace_from_path(args['<workspace>']) workspace.check_paths() workspace.make_dirs() workspace.clear_fragments() # Run the fragment generation script. generate_fragments = [ 'klab_generate_fragments', workspace.input_pdb_path, '--outdir', workspace.fragments_dir, '--memfree', args['--mem_free'], ] if not args['--ignore-loop-file']: generate_fragments += [ '--loops_file', workspace.loops_path, ] if args['--dry-run']: print(' '.join(generate_fragments)) else: subprocess.call(generate_fragments)
def main(): args = docopt.docopt(__doc__) root = args['<workspace>'] round = args['<round>'] output_path = args['<pdf_output>'] # Right now I'm looking at validated designs by default, but the user may # be interested in fixbb designs or restrained models as well. workspace = pipeline.ValidatedDesigns(root, round) workspace.check_paths() designs = [structures.Design(x) for x in workspace.output_subdirs] sequences = corebio.seq.SeqList( [corebio.seq.Seq(x.resfile_sequence) for x in designs], alphabet=corebio.seq.unambiguous_protein_alphabet, ) logo_data = weblogo.LogoData.from_seqs(sequences) logo_options = weblogo.LogoOptions() logo_options.title = workspace.focus_dir logo_format = weblogo.LogoFormat(logo_data, logo_options) with open(output_path, 'wb') as logo_file: document = weblogo.pdf_formatter(logo_data, logo_format) logo_file.write(document)
def main(): args = docopt.docopt(__doc__) cluster.require_qsub() workspace = pipeline.ValidatedDesigns(args['<workspace>'], args['<round>']) workspace.check_paths() workspace.check_rosetta() workspace.make_dirs() workspace.clear_fragments() # Run the fragment generation script. generate_fragments = [ 'klab_generate_fragments', '--loops_file', workspace.loops_path, '--outdir', workspace.fragments_dir, '--memfree', args['--mem-free'], workspace.input_dir, ] if args['--dry-run']: print ' '.join(generate_fragments) else: subprocess.call(generate_fragments)
def main(): args = docopt.docopt(__doc__) pipeline.fetch_data( args['<directory>'], args['--remote'], args['--include-logs'], args['--dry-run'], )
def discover_filter_metrics(metrics, workspaces): for workspace in workspaces: filter_list = pipeline.workspace_from_dir( docopt.docopt(__doc__)['<workspace>']).filters_list filters = [] with open(filter_list, "r") as file: filters = yaml.load(file) if filters: for record in filters: filterclass = ExtraFilterHandler(record) metrics.append(filterclass)
def main(): args = docopt.docopt(__doc__) num_models = 0 for directory in args['<directories>']: records = structures.\ load(directory, args['--restraints'], not args['--recalc']) if args['--query']: records = records.query(args['--query']) num_models += len(records) print num_models
def main(): args = docopt.docopt(__doc__) # Setup the workspace. workspace = pipeline.ValidatedDesigns(args['<workspace>'], args['<round>']) workspace.check_paths() workspace.make_dirs() if args['--clear']: workspace.clear_inputs() # Copy the manual designs into the input directory. for source_path in args['<pdbs>']: dest_path = os.path.join(workspace.input_dir, os.path.basename(source_path)) shutil.copy(source_path, dest_path)
def main(): args = docopt.docopt(__doc__) cluster.require_qsub() # Setup the workspace. workspace = pipeline.ValidatedDesigns(args['<workspace>'], args['<round>']) workspace.check_paths() workspace.make_dirs() if args['--clear'] or args['--test-run']: workspace.clear_outputs() # Setup an output directory for each input. inputs = workspace.unclaimed_inputs nstruct = len(inputs) * int(args['--nstruct']) if nstruct == 0: scripting.print_error_and_die("""\ No unclaimed input files. If you previously started a round of simulations and then stopped them for some reason, the problem is probably that all the inputs are still claimed by those simulations. Use the '--clear' flag to remove the claims and try again.""") for input in inputs: subdir = workspace.output_subdir(input) scripting.clear_directory(subdir) # Launch the validation job. big_jobs.submit( 'pip_validate.py', workspace, inputs=inputs, nstruct=nstruct, max_runtime=args['--max-runtime'], max_memory=args['--max-memory'], test_run=args['--test-run'], )
def main(): args = docopt.docopt(__doc__) wait_time = 60 * eval(args['--wait-time']) if args['--keep-going']: while True: pipeline.fetch_and_cache_data( args['<directory>'], args['--remote'], args['--include-logs'], ) print "Waiting {} min...".format(wait_time // 60) time.sleep(wait_time) else: pipeline.fetch_and_cache_data( args['<directory>'], args['--remote'], args['--include-logs'], )
def main(): arguments = docopt.docopt(__doc__) cluster.require_qsub() # Setup the workspace. workspace = pipeline.RestrainedModels(arguments['<workspace>']) workspace.check_paths() workspace.check_rosetta() workspace.make_dirs() if arguments['--clear'] or arguments['--test-run']: workspace.clear_outputs() # Submit the model building job. big_jobs.submit('pip_build.py', workspace, nstruct=arguments['--nstruct'], max_runtime=arguments['--max-runtime'], max_memory=arguments['--max-memory'], test_run=arguments['--test-run'])
def main(): args = docopt.docopt(__doc__) print structures.load(args['<directory>'], args['--restraints'], not args['--recalc']).head()
def main(): args = docopt.docopt(__doc__) pipeline.push_data(args['<directory>'], args['--remote'], args['--dry-run'])
def main(): arguments = docopt.docopt(__doc__) workspace = pipeline.Workspace(arguments['<workspace>']) # Make a new workspace directory. if workspace.incompatible_with_fragments_script: scripting.print_error_and_die("""\ Illegal character(s) found in workspace path: {} The full path to a workspace must contain only characters that are alphanumeric or '.' or '_'. The reason for this ridiculous rule is the fragment generation script, which will silently fail if the full path to its input file contains any characters but those.""", workspace.abs_root_dir) if workspace.exists(): if arguments['--overwrite']: shutil.rmtree(workspace.root_dir) else: scripting.print_error_and_die("""\ Design '{0}' already exists. Use '-o' to overwrite.""", workspace.root_dir) workspace.make_dirs() # Decide which settings to ask for. if arguments['--remote']: installers = ( RosettaDir, RsyncUrl, ) else: installers = ( RosettaDir, InputPdb, LoopsFile, Resfile, RestraintsFile, ScoreFunction, BuildScript, DesignScript, ValidateScript, FilterScript, SharedDefs, FlagsFile, ) # Get the necessary settings from the user and use them to fill in the # workspace. print "Please provide the following pieces of information:" print scripting.use_path_completion() for installer in installers: # If the installer doesn't have a prompt, just install it without # asking any questions. if installer.prompt is None: installer.install(workspace) continue # Otherwise, print a description of the setting being installed and # prompt the user for a value. print installer.description print while True: try: setting = raw_input(installer.prompt) installer.install(workspace, setting) except (ValueError, IOError) as problem: print problem continue except (KeyboardInterrupt, EOFError): shutil.rmtree(workspace.root_dir) scripting.print_error_and_die("\nReceived exit command, no workspace created.") else: break print # If we made a link to a remote workspace, immediately try to synchronize # with it. Rsync will say whether or not it succeeded. Otherwise just # print a success message. if arguments['--remote']: pipeline.fetch_data(workspace.root_dir) else: print "Setup successful for design '{0}'.".format(workspace.root_dir)
reversion_seq[aa_num - first_res] = wt_aa name = original_design + "_reversion" num_reversion_designs = 1 while name in reverted_sequences: num_reversion_designs += 1 name = name + str(num_reversion_designs) reverted_sequences[name] = "".join(reversion_seq) for design in reverted_sequences: protein_sequences[design] = reverted_sequences[design] return protein_sequences arguments = docopt.docopt(__doc__) inputs = arguments['<input_fasta_or_folder>'] template_dna = arguments['--template-dna'] wt_sequence_file = arguments['--combine-chains'] wt_sequence_str = '' reversion_mutations = arguments['--reversion-mutations'] if wt_sequence_file: wt_sequence_str = import_wt_protein_sequence(wt_sequence_file) from_files = arguments['--from-pdb-folder'] if from_files:
def main(): args = docopt.docopt(__doc__) root = args['<workspace>'] round = args['<round>'] query = ' and '.join(args['<queries>']) temp = float(args['--temp']) # Import ``pylab`` after handling the help message, because otherwise # ``matplotlib`` sometimes issues warnings that then show up in the docs. import pylab workspace = pipeline.ValidatedDesigns(root, round) workspace.check_paths() workspace.make_dirs() if args['--clear']: workspace.clear_inputs() predecessor = workspace.predecessor # Get sequences and scores for each design. seqs_scores = structures.load( predecessor.output_dir, use_cache=not args['--recalc'], ) seqs_scores.dropna(inplace=True) print 'Total number of designs: ', len(seqs_scores) # If a query was given on the command line, find models that satisfy it. if query: seqs_scores = seqs_scores.query(query) print ' minus given query: ', len(seqs_scores) # Keep only the lowest scoring model for each set of identical sequences. groups = seqs_scores.groupby('sequence', group_keys=False) seqs_scores = groups.\ apply(lambda df: df.ix[df.total_score.idxmin()]).\ reset_index(drop=True) print ' minus duplicate sequences:', len(seqs_scores) # Remove designs that have already been picked. existing_inputs = set( os.path.basename(os.path.realpath(x)) for x in workspace.input_paths) seqs_scores = seqs_scores.query('path not in @existing_inputs') print ' minus current inputs: ', len(seqs_scores) print # Use a Boltzmann weighting scheme to pick designs. seq_scores = seqs_scores.sort_values(by='total_score') scores = seqs_scores.total_score.values scores -= median(scores) weights = exp(-scores / temp) indices = arange(len(scores)) pdf = array(weights) cdf = cumsum(pdf) / sum(pdf) num_to_pick = min(int(args['--num']), len(scores)) picked_indices = set() while len(picked_indices) < num_to_pick: choice = random.random() picked_index = indices[cdf > choice][0] picked_indices.add(picked_index) picked_indices = sorted(picked_indices) # Show the user the probability distributions used to pick designs. raw_input("""\ Press [enter] to view the designs that were picked and the distributions that were used to pick them. Pay particular attention to the CDF. If it is too flat, the temperature (T={0}) is too high and designs are essentially being picked randomly. If it is too sharp, the temperature is too low and only the highest scoring designs are being picked. """.format(temp)) color = '#204a87' # Tango dark blue base_format = dict(color=color) picked_format = dict(marker='o', ls='none', mfc=color, mec='none') pylab.figure(num=1, figsize=(8, 3)) pylab.subplot(1, 3, 1) pylab.title('Rosetta Scores') pylab.plot(indices, scores, **base_format) pylab.plot(picked_indices, scores[picked_indices], **picked_format) pylab.subplot(1, 3, 2) pylab.title('Boltzmann PDF') pylab.plot(indices, pdf, **base_format) pylab.plot(picked_indices, pdf[picked_indices], **picked_format) pylab.yscale('log') pylab.subplot(1, 3, 3) pylab.title('Boltzmann CDF') pylab.plot(indices, cdf, **base_format) pylab.plot(picked_indices, cdf[picked_indices], **picked_format) pylab.tight_layout() pylab.show() if raw_input("Accept these picks? [Y/n] ") == 'n': print "Aborting." sys.exit() # Make symlinks to the picked designs. if not args['--dry-run']: existing_ids = set( int(x[0:-len('.pdb.gz')]) for x in os.listdir(workspace.input_dir)) next_id = max(existing_ids) + 1 if existing_ids else 0 for id, picked_index in enumerate(picked_indices, next_id): basename = seqs_scores.iloc[picked_index]['path'] target = os.path.join(predecessor.output_dir, basename) link_name = os.path.join(workspace.input_dir, '{0:04}.pdb.gz') scripting.relative_symlink(target, link_name.format(id)) print "Picked {} designs.".format(len(picked_indices)) if args['--dry-run']: print "(Dry run: no symlinks created.)"