def has_interface(self, pose: pyrosetta.Pose, interface: str) -> bool: if pose is None: pose = self.pose pose2pdb = pose.pdb_info().pose2pdb have_chains = {pose2pdb(r).split()[1] for r in range(1, pose.total_residue() + 1)} want_chains = set(interface.replace('_', '')) return have_chains == want_chains
def add_bfactor_from_score(pose: pyrosetta.Pose): """ Adds the bfactors from total_score. Snippet for testing in Jupyter >>> import nglview as nv >>> view = nv.show_rosetta(pose) >>> # view = nv.show_file('test.cif') >>> view.clear_representations() >>> view.add_tube(radiusType="bfactor", color="bfactor", radiusScale=0.10, colorScale="RdYlBu") >>> view ``replace_res_remap_bfactors`` may have been a cleaner strategy. This was quicker to write. If this fails, it may be because the pose was not scored first. """ if pose.pdb_info().obsolete(): raise ValueError( 'Pose pdb_info is flagged as obsolete (change `pose.pdb_info().obsolete(False)`)' ) # scores energies = pose.energies() def get_res_score(res): total_score = pyrosetta.rosetta.core.scoring.ScoreType.total_score # if pose.residue(res).is_polymer() try: return energies.residue_total_energies(res)[total_score] except: return float('nan') # the array goes from zero (nan) to n_residues total_scores = np.array( [float('nan')] + [get_res_score(res) for res in range(1, pose.total_residue() + 1)]) mask = np.isnan(total_scores) total_scores -= np.nanmin(total_scores) total_scores *= 100 / np.nanmax(total_scores) total_scores = np.nan_to_num(total_scores, nan=100) total_scores[mask] = 0. # add to pose pdb_info = pose.pdb_info() for res in range(pose.total_residue()): for i in range(pose.residue(res + 1).natoms()): pdb_info.bfactor(res + 1, i + 1, total_scores[res + 1])
def pose_from_sequence( seq , res_type = 'fa_standard' , name = '' , chain_id = 'A' ): """ Returns a pose generated from amino acid single letters in <seq> using the <res_type> ResidueType, the new pose's PDBInfo is named <name> and all residues have chain ID <chain_id> example: pose=pose_from_sequence('LIGAND') See also: Pose make_pose_from_sequence pose_from_file pose_from_rcsb """ pose=Pose() make_pose_from_sequence(pose,seq,res_type) #pdb_info = rosetta.core.pose.PDBInfo(pose.total_residue()) # actual, for other code pdb_info = PDBInfo(pose.total_residue()) # create a PDBInfo object for i in range(0,pose.total_residue()): if pose.residue(i+1).is_protein(): # set to a more reasonable default pose.set_phi(i+1,-150) pose.set_psi(i+1,150) pose.set_omega(i+1,180) # set PDBInfo info for chain and number #pdb_info.chain(i+1,chain_id) #pdb_info.number(i+1,i+1) #### you can alternatively use the deprecated method set_extended_torsions #### which requires a Pose and a Loop object...so make a large loop #set_extended_torsions( pose , Loop ( 1 , pose.total_residue() ) ) # set the PDBInfo pose.pdb_info(pdb_info) # default name to first 3 letters if not name: name = seq[:4] pose.pdb_info().name(name) # print pose return pose
dest='residues', default='', # default to the median residue number help='the (pose numbered) residues to inspect carefully') (options, args) = parser.parse_args() # PDB file option pdb_filename = options.pdb_filename # create a pose from the desired PDB file # create an empty Pose object pose = Pose() # load the data from pdb_file into the pose pose_from_file(pose, pdb_filename) # default to the median residue number residues = options.residues if not options.residues: residues = [int(pose.total_residue() / 2)] elif options.residues == 'all': # accept the word 'all' in place of a residue list residues = range(1, pose.total_residue() + 1) else: # please provide the residues of interest as, delimited residues = [int(r) for r in options.residues.split(',')] pose_structure(pose, residues) ################################################################################ # ALTERNATE SCENARIOS ################################# # Obtaining and Editing PDB files """
def packer_task(pose, PDB_out=False): """ Demonstrates the syntax necessary for basic usage of the PackerTask object performs demonstrative sidechain packing and selected design using <pose> and writes structures to PDB files if <PDB_out> is True """ # create a copy of the pose test_pose = Pose() test_pose.assign(pose) # this object is contained in PyRosetta v2.0 and above pymover = PyMOLMover() # create a standard ScoreFunction scorefxn = get_fa_scorefxn( ) # create_score_function_ws_patch('standard', 'score12') ############ # PackerTask # a PackerTask encodes preferences and options for sidechain packing, an # effective Rosetta methodology for changing sidechain conformations, and # design (mutation) # a PackerTask stores information on a per-residue basis # each residue may be packed or designed # PackerTasks are handled slightly differently in PyRosetta ####pose_packer = PackerTask() # this line will not work properly pose_packer = standard_packer_task(test_pose) # the pose argument tells the PackerTask how large it should be # sidechain packing "optimizes" a pose's sidechain conformations by cycling # through (Dunbrack) rotamers (sets of chi angles) at a specific residue # and selecting the rotamer which achieves the lowest score, # enumerating all possibilities for all sidechains simultaneously is # impractically expensive so the residues to be packed are individually # optimized in a "random" order # packing options include: # -"freezing" the residue, preventing it from changing conformation # -including the original sidechain conformation when determining the # lowest scoring conformation pose_packer.restrict_to_repacking() # turns off design pose_packer.or_include_current(True) # considers original conformation print(pose_packer) # packing and design can be performed by a PackRotamersMover, it requires # a ScoreFunction, for optimizing the sidechains and a PackerTask, # setting the packing and design options packmover = protocols.minimization_packing.PackRotamersMover( scorefxn, pose_packer) scorefxn(pose) # to prevent verbose output on the next line print('\nPre packing score:', scorefxn(test_pose)) test_pose.pdb_info().name('original') # for PyMOLMover pymover.apply(test_pose) packmover.apply(test_pose) print('Post packing score:', scorefxn(test_pose)) test_pose.pdb_info().name('packed') # for PyMOLMover pymover.apply(test_pose) if PDB_out: test_pose.dump_pdb('packed.pdb') # since the PackerTask specifies how the sidechains change, it has been # extended to include sidechain constitutional changes allowing # protein design, this method of design is very similar to sidechain # packing; all rotamers of the possible mutants at a single residue # are considered and the lowest scoring conformation is selected # design options include: # -allow all amino acids # -allow all amino acids except cysteine # -allow specific amino acids # -prevent specific amino acids # -allow polar amino acids only # -prevent polar amino acids # -allow only the native amino acid # the myriad of packing and design options can be set manually or, more # commonly, using a specific file format known as a resfile # resfile syntax is explained at: # http://www.rosettacommons.org/manuals/archive/rosetta3.1_user_guide/file_resfiles.html # manually setting deign options is tedious, the methods below are handy # for creating resfiles # mutate the "middle" residues center = test_pose.total_residue() // 2 specific_design = {} for i in range(center - 2, center + 3): specific_design[i] = 'ALLAA' # write a resfile to perform these mutations generate_resfile_from_pose(test_pose, 'sample_resfile', False, specific=specific_design) # setup the design PackerTask, use the generated resfile pose_design = standard_packer_task(test_pose) rosetta.core.pack.task.parse_resfile(test_pose, pose_design, 'sample_resfile') print(pose_design) # prepare a new structure test_pose.assign(pose) # perform design designmover = protocols.minimization_packing.PackRotamersMover( scorefxn, pose_design) print( '\nDesign with all proteogenic amino acids at (pose numbered)\ residues', center - 2, 'to', center + 2) print('Pre-design score:', scorefxn(test_pose)) print( 'Pre-design sequence: ...' + \ test_pose.sequence()[center - 5:center + 4] + '...' ) designmover.apply(test_pose) # perform design print('\nPost-design score:', scorefxn(test_pose)) print( 'Post-design sequence: ...' + \ test_pose.sequence()[center - 5:center + 4] + '...' ) test_pose.pdb_info().name('designed') # for PyMOLMover pymover.apply(test_pose) if PDB_out: test_pose.dump_pdb('designed.pdb')
# TODO: rename pose_from_file or make_pose_from_sequence to be parallel print('Building Pose from sequence...') pose3 = pose_from_sequence("DSEEKFLRRIGRFGYGYGPYE", 'centroid') print(pose3) pose4 = pose_from_sequence("ARNDCEQGHILKMFPSTWYV", 'fa_standard') print('Dump PDB...') dump_pdb(pose, "T110_Basic._.pdb") print('accessing pose attributes') print(pose) # TODO: remove extra blank lines at end print('there are ', pose.total_residue(), 'residues in this pose object') print('phi of residue 5 is ', pose.phi(5)) print('psi of residue 5 is ', pose.psi(5)) print('set phi of residue 5 to -60') pose.set_phi(1, -60) print('set psi of residue 5 to -50') pose.set_psi(1, -50) print('accessing residue 5 from pose') res5 = pose.residue(5) print(res5) print('accessing atoms from residue 5') at5N = res5.atom('N') at5CA = res5.atom("CA")
def calc_binding_energy(pose, scorefxn, center, cutoff=8.0): # create a copy of the pose for manipulation test_pose = Pose() test_pose.assign(pose) # setup packer options # the sidechain conformations of residues "near the interface", defined as # within <cutoff> Angstroms of an interface residue, may change and # must be repacked, if all residues are repacked, aberrant sidechain # conformations near the interface, but independent of complex # interactions, will be repacked for the mutant and wild-type structures # preventing them from adding noise to the score difference # this method of setting up a PackerTask is different from packer_task.py tf = standard_task_factory() # create a TaskFactory tf.push_back(core.pack.task.operation.RestrictToRepacking() ) # restrict it to repacking # this object contains repacking options, instead of turning the residues # "On" or "Off" directly, this will create an object for these options # and assign it to the TaskFactory prevent_repacking = core.pack.task.operation.PreventRepacking() # the "center" (nbr_atom) of the mutant residue, for distance calculation center = test_pose.residue(center).nbr_atom_xyz() for i in range(1, test_pose.total_residue() + 1): # the .distance_squared method is (a little) lighter than .norm # if the residue is further than <cutoff> Angstroms away, do not repack if center.distance_squared( test_pose.residue(i).nbr_atom_xyz()) > cutoff**2: prevent_repacking.include_residue(i) # apply these settings to the TaskFactory tf.push_back(prevent_repacking) # setup a PackRotamersMover packer = protocols.minimization_packing.PackRotamersMover(scorefxn) packer.task_factory(tf) #### create a Mover for performing translation #### RigidBodyTransMover is SUPPOSED to translate docking partners of a #### pose based on an axis and magnitude #### test it using the PyMOLMover, it does not perform a simple translation #### I also observed a "Hbond Tripped" error when packing after applying #### the Mover, it appears to store inf and NaN values into hbonds #transmover = RigidBodyTransMover() # calc_interaction_energy separates the chains by 500.0 Angstroms, # so does this Mover # if using this Mover, the step_size MUST be a float # if this setting is left to default, it will move the proteins # VERY far apart #transmover.step_size( 5.0 ) # repack the test_pose packer.apply(test_pose) # score this structure before = scorefxn(test_pose) # separate the docking partners #### since RigidBodyTransMover DOES NOT WORK, it is not used #transmover.apply(test_pose) # here are two methods for applying a translation onto a pose structure # both require an xyzVector xyz = rosetta.numeric.xyzVector_double_t() # a Vector for coordinates xyz.x = 500.0 # arbitrary separation magnitude, in the x direction xyz.y = 0.0 #...I didn't have this and it defaulted to 1e251...? xyz.z = 0.0 #...btw thats like 1e225 light years, # over 5e245 yrs at Warp Factor 9.999 (thanks M. Pacella) #### here is a hacky method for translating the downstream partner of a # pose protein-protein complex (must by two-body!) chain2starts = len(pose.chain_sequence(1)) + 1 for r in range(chain2starts, test_pose.total_residue() + 1): for a in range(1, test_pose.residue(r).natoms() + 1): test_pose.residue(r).set_xyz(a, test_pose.residue(r).xyz(a) + xyz) # here is an elegant way to do it, it assumes that jump number 1 # defines the docking partners "connectivity" # the pose.jump method returns a jump object CREATED from the pose jump # data, the pose itself does not own a Jump object, thus you can use # Jump methods, such as pose.jump(1).set_translation, however the object # has not been properly constructed for manipulation, thus performing # a change does not cause any problems, but is not permanently applied #translate = test_pose.jump( 1 ) # copy this information explicitly # adjust its translation via vector addition #translate.set_translation( translate.get_translation() + xyz ) #test_pose.set_jump( 1 , translate ) # as explained above, this call will NOT work #test_pose.jump(1).set_translation( test_pose.get_translation() + xyz ) # repack the test_pose after separation packer.apply(test_pose) # return the change in score return before - scorefxn(test_pose)
def scanning(pdb_filename, partners, mutant_aa='A', interface_cutoff=8.0, output=False, trials=1, trial_output=''): """ Performs "scanning" at an interface within <pdb_filename> between <partners> by mutating relevant residues to <mutant_aa> and repacking residues within <pack_radius> Angstroms, further repacking all residues within <interface_cutoff> of the interface residue, scoring the complex and subtracting the score of a pose with the partners separated by 500 Angstroms. <trials> scans are performed (to average results) with summaries written to <trial_output>_(trial#).txt. Structures are exported to a PyMOL instance. """ # 1. create a pose from the desired PDB file pose = Pose() pose_from_file(pose, pdb_filename) # 2. setup the docking FoldTree and other related parameters dock_jump = 1 movable_jumps = Vector1([dock_jump]) protocols.docking.setup_foldtree(pose, partners, movable_jumps) # 3. create ScoreFuncions for the Interface and "ddG" calculations # the pose's Energies objects MUST be updated for the Interface object to # work normally scorefxn = get_fa_scorefxn() # create_score_function('standard') scorefxn(pose) # needed for proper Interface calculation # setup a "ddG" ScoreFunction, custom weights ddG_scorefxn = ScoreFunction() ddG_scorefxn.set_weight(core.scoring.fa_atr, 0.44) ddG_scorefxn.set_weight(core.scoring.fa_rep, 0.07) ddG_scorefxn.set_weight(core.scoring.fa_sol, 1.0) ddG_scorefxn.set_weight(core.scoring.hbond_bb_sc, 0.5) ddG_scorefxn.set_weight(core.scoring.hbond_sc, 1.0) # 4. create an Interface object for the pose interface = Interface(dock_jump) interface.distance(interface_cutoff) interface.calculate(pose) # 5. create a PyMOLMover for sending output to PyMOL (optional) pymover = PyMOLMover() pymover.keep_history(True) # for multiple trajectories pymover.apply(pose) pymover.send_energy(pose) # 6. perform scanning trials # the large number of packing operations introduces a lot of variability, # for best results, perform several trials and average the results, # these score changes are useful to QUALITATIVELY defining "hotspot" # residues # this script does not use a PyJobDistributor since no PDB files are output for trial in range(trials): # store the ddG values in a dictionary ddG_mutants = {} for i in range(1, pose.total_residue() + 1): # for residues at the interface if interface.is_interface(i) == True: # this way you can TURN OFF output by providing False arguments # (such as '', the default) filename = '' if output: filename = pose.pdb_info().name()[:-4] + '_' +\ pose.sequence()[i-1] +\ str(pose.pdb_info().number(i)) + '->' + mutant_aa # determine the interace score change upon mutation ddG_mutants[i] = interface_ddG(pose, i, mutant_aa, movable_jumps, ddG_scorefxn, interface_cutoff, filename) # output results print('=' * 80) print('Trial', str(trial + 1)) print( 'Mutants (PDB numbered)\t\"ddG\" (interaction dependent score change)' ) residues = list(ddG_mutants.keys() ) # list(...) conversion is for python3 compatbility residues.sort() # easier to read display = [ pose.sequence()[i - 1] + str(pose.pdb_info().number(i)) + mutant_aa + '\t' + str(ddG_mutants[i]) + '\n' for i in residues ] print(''.join(display)[:-1]) print('=' * 80) # write to file f = open(trial_output + '_' + str(trial + 1) + '.txt', 'w') f.writelines(display) f.close() #### alternate output using scanning_analysis (see below), only display #### mutations with "deviant" score changes print('Likely Hotspot Residues') for hotspot in scanning_analysis(trial_output): print(hotspot) print('=' * 80)