def energy(self, asgl_output=False, normalize_profile=False, residue_span_range=(0, 99999), output='LONG', file='default', viol_report_cut=physical.values(default=4.5, chi1_dihedral=999, chi2_dihedral=999, chi3_dihedral=999, chi4_dihedral=999, chi5_dihedral=999, phi_psi_dihedral=6.5, nonbond_spline=999, accessibility=999, density=999, gbsa=999, em_density=999), viol_report_cut2=physical.values(default=2.0), smoothing_window=3, schedule_scale=None, edat=None): """Evaluate the objective function given restraints""" (inds, mdl) = self.__require_indices() if edat is None: edat = mdl.env.edat if schedule_scale is None: schedule_scale = mdl.env.schedule_scale func = _modeller.mod_energy (molpdf, terms) = func(mdl.modpt, edat.modpt, mdl.env.libs.modpt, inds, asgl_output, normalize_profile, residue_span_range, output, file, smoothing_window, viol_report_cut, viol_report_cut2, schedule_scale) terms = physical.from_list(terms) return (molpdf, terms)
def __init__(self, env, sequence, alnfile=None, knowns=None, inimodel=None, deviation=None, library_schedule=None, csrfile=None, inifile=None, assess_methods=None, loop_assess_methods=None): loopmodel.__init__(self, env, sequence, alnfile, knowns, inimodel, deviation, library_schedule, csrfile, inifile, assess_methods, loop_assess_methods) self.loop.env.schedule_scale = physical.values(default=1.0, nonbond_spline=0.6) edat = self.loop.env.edat edat.contact_shell = 8.00 edat.dynamic_sphere = False edat.dynamic_lennard = True edat.dynamic_coulomb = False edat.relative_dielectric = 1.0 edat.dynamic_modeller = True edat.energy_terms.append(gbsa.Scorer(cutoff=edat.contact_shell))
def loopschedule(): return schedule(4, [ step(CG, None, mk_scale(default=1.00, nonbond=0.0, spline=1.00)), step(CG, None, mk_scale(default=2.00, nonbond=0.01, spline=0.01)), step(CG, None, mk_scale(default=1.00, nonbond=0.10, spline=0.10)), step(CG, None, mk_scale(default=1.00, nonbond=0.50, spline=0.50)), step(CG, None, physical.values(default=4.00)) ])
def hot_atoms(self, pick_hot_cutoff, residue_span_range=(0, 99999), viol_report_cut=physical.values(default=4.5, chi1_dihedral=999, chi2_dihedral=999, chi3_dihedral=999, chi4_dihedral=999, chi5_dihedral=999, phi_psi_dihedral=6.5, nonbond_spline=999, accessibility=999, density=999, gbsa=999, em_density=999), schedule_scale=None, edat=None): """Return a new selection containing all atoms violating restraints. :return: The new selection :rtype: :class:`selection`""" (inds, mdl) = self.__require_indices() if edat is None: edat = mdl.env.edat if schedule_scale is None: schedule_scale = mdl.env.schedule_scale func = _modeller.mod_selection_hot_atoms newinds = func(mdl.modpt, edat.modpt, mdl.env.libs.modpt, inds, residue_span_range, pick_hot_cutoff, viol_report_cut, schedule_scale) newobj = selection() newobj.__mdl = mdl newobj.__selection = dict.fromkeys(newinds) return newobj
def initial_refine_hot(self, atmsel): """Do some initial refinement of hotspots in the model""" viol_rc = physical.values(default=999) stereo_typ = (physical.bond, physical.angle, physical.dihedral, physical.improper, physical.disulfide_distance, physical.disulfide_angle, physical.disulfide_dihedral) homol_typ = (physical.ca_distance, physical.n_o_distance, physical.omega_dihedral, physical.sd_mn_distance, physical.phi_psi_dihedral, physical.sd_sd_distance) if self.rstrs_defined == 0: # Refine only hotspots that have badly violated stereochemical # restraints: for typ in stereo_typ: viol_rc[typ] = 4 elif self.rstrs_defined == 1: # Refine hotspots that have badly violated stereochemical # restraints and the important homology-derived restraints: for typ in stereo_typ + homol_typ: viol_rc[typ] = 4 elif self.rstrs_defined == 2: # Refine hotspots that have badly violated any kind of # restraints viol_rc['default'] = 4 # Pick hot atoms (must pick whole residues because of sidechains): atmsel = atmsel.hot_atoms(pick_hot_cutoff=4.5, viol_report_cut=viol_rc) atmsel = atmsel.by_residue() # Pick all corresponding (violated and others) restraints: self.restraints.unpick_all() self.restraints.pick(atmsel) # Local optimization to prevent MD explosions: cg = conjugate_gradients() cg.optimize(atmsel, max_iterations=100, output=self.optimize_output)
def __init__(self, env, sequence, alnfile=None, knowns=[], inimodel=None, deviation=None, library_schedule=None, csrfile=None, inifile=None, assess_methods=None, loop_assess_methods=None, refinepot=['$(LIB)/atmcls-mf.lib','$(LIB)/dist-mf.lib'], loops=[],calcrmsds='111',nonbond_spine=0.1,contact_shell=12.0, deviations=50,energytrace=False,assess_trace=True): loopmodel.__init__(self, env, sequence, alnfile, knowns, inimodel, deviation, library_schedule, csrfile, inifile, assess_methods, loop_assess_methods) self.loops=loops self.refinepotential=refinepot #self.load_native_model() self.calc_rmsds=calcrmsds self.deviations=deviations self.energytrace=energytrace self.assess_trace=assess_trace self.loop.env.schedule_scale = physical.values(default=1.0, nonbond_spline=nonbond_spine)#0.6 edat = self.loop.env.edat edat.contact_shell=contact_shell edat.dynamic_sphere=True#True edat.dynamic_lennard=False#False edat.dynamic_coulomb=False#False edat.relative_dielectric=1.0 edat.dynamic_modeller=True#True #self.loop.library_schedule self.loop.library_schedule=loopschedule() self.rmsd_calc_initialized=False
def mk_scale(default, nonbond, spline=None): """Utility function for generating scaling values""" v = physical.values(default=default) for term in (physical.soft_sphere, physical.lennard_jones, physical.coulomb, physical.gbsa, physical.em_density, physical.saxs): v[term] = nonbond if spline is not None: v[physical.nonbond_spline] = spline return v
def objfunc(self, edat=None, residue_span_range=(0,99999), schedule_scale=physical.values(default=1.0)): """Get just the objective function value, without derivatives""" (inds, mdl) = self.__require_indices() if edat is None: edat = mdl.env.edat return _modeller.mod_selection_objfunc(mdl.modpt, edat.modpt, mdl.env.libs.modpt, inds, residue_span_range, schedule_scale)
def model_analysis(self, atmsel, filename, out, num): """Energy evaluation and assessment, and write out the model""" if self.accelrys: # Write the final model (Accelrys wants it before calculating the # profiles, so that the Biso column contains the original # template-derived averages) self.write(file=filename) for (id, norm) in (('.E', False), ('.NE', True)): atmsel.energy(output='LONG ENERGY_PROFILE', normalize_profile=norm, file=modfile.default(file_id=id, file_ext='', root_name=self.sequence, id1=9999, id2=num)) # The new request from Lisa/Azat to print out only # stereochemical restraint violations (6/24/03): # select only stereochemical restraints (maybe add dihedral # angles?): scal = physical.values(default=0, bond=1, angle=1, dihedral=1, improper=1, soft_sphere=1, disulfide_distance=1, disulfide_angle=1, disulfide_dihedral=1) for (id, norm) in (('.ES', False), ('.NES', True)): e = atmsel.energy(output='ENERGY_PROFILE', normalize_profile=norm, schedule_scale=scal, file=modfile.default(file_id=id, file_ext='', root_name=self.sequence, id1=9999, id2=num)) (out['molpdf'], out['pdfterms']) = e self.user_after_single_model() else: e = atmsel.energy(output='LONG VIOLATIONS_PROFILE', file=modfile.default(file_id='.V', file_ext='', root_name=self.sequence, id1=9999, id2=num)) (out['molpdf'], out['pdfterms']) = e self.user_after_single_model() # Write the final model; Biso contains the violations profile self.write(file=filename) # Do model assessment if requested self.assess(atmsel, self.assess_methods, out)
def get_energy_profile(self, edat, physical_type): """Get a per-residue energy profile, plus the number of restraints on each residue, and the RMS minimum and heavy violations""" (inds, mdl) = self.__require_indices() scaln = physical.values(default=0.) scaln[physical_type] = 1. prof = _modeller.mod_rms_profile(mdl.modpt, edat.modpt, mdl.env.libs.modpt, inds, (1, 9999), True, False, physical_type.get_type(), scaln) return EnergyProfile(*prof)
def special_patches(self, aln): # Sets the weights of the objective function. self.env.schedule_scale = physical.values( default=self._altmod_w_default, nonbond_spline=self.altmod_w_sp, # Distance restraints terms. ca_distance=self._altmod_w_ca_distance, n_o_distance=self._altmod_w_n_o_distance, sd_mn_distance=self._altmod_w_sd_mn_distance, sd_sd_distance=self._altmod_w_sd_sd_distance, ) # Allow calculation of statistical (dynamic_modeller) potential. edat = self.env.edat edat.contact_shell = self.sp_contact_shell edat.dynamic_modeller = True #-------------------- # Group restraints. - #-------------------- # Read Fiser/Melo loop modeling potential if self.statistical_potential == "fm": gprsr = group_restraints(self.env, classes='$(LIB)/atmcls-melo.lib', parameters='$(LIB)/melo1-dist.lib') # Read DOPE loop modeling potential (the same one used in assess_dope). elif self.statistical_potential == "dope": gprsr = group_restraints(self.env, classes='$(LIB)/atmcls-mf.lib', parameters=self._dope_params_filepath) # Read DOPE-HR loop modeling potential elif self.statistical_potential == "dopehr": gprsr = group_restraints(self.env, classes='$(LIB)/atmcls-mf.lib', parameters='$(LIB)/dist-mfhr.lib') # DFIRE. elif self.statistical_potential == "dfire": gprsr = group_restraints(self.env, classes='$(LIB)/atmcls-mf.lib', parameters=self._dfire_params_filepath) elif self.statistical_potential == None: gprsr = None else: raise KeyError("Unknown potential: %s." % self.statistical_potential) self.group_restraints = gprsr
def pick(self, atmsel, residue_span_range=(0, 99999), restraint_sel_atoms=1, restraints_filter=physical.values(default=-999)): """Select specified restraints""" (inds, mdl) = atmsel.get_atom_indices() if mdl is not self.__mdl: raise ValueError("selection refers to a different model") return _modeller.mod_restraints_pick(self.__mdl.modpt, inds, residue_span_range, restraint_sel_atoms, restraints_filter)
def _dope_energy(self, gprsr, name, output='SHORT NO_REPORT', residue_span_range=(1, 9999), schedule_scale=physical.values(default=0., nonbond_spline=1.), **vars): """Internal function to do DOPE or DOPE-HR assessment""" mdl = self.__mdl print ">> Model assessment by %s potential" % name edat = self.get_dope_energy_data() old_gprsr = mdl.group_restraints mdl.group_restraints = gprsr try: (molpdf, terms) = \ self.energy(edat=edat, residue_span_range=residue_span_range, output=output, schedule_scale=schedule_scale, **vars) finally: mdl.group_restraints = old_gprsr print "%s score : %12.6f" % (name, molpdf) return molpdf
class environ(modobject): """Modeller environment (libraries etc.)""" #: factors for physical restraint types in scaling the schedule schedule_scale = physical.values(default=1.0) #: whether to do default NTER and CTER patching patch_default = True _rand_seed = None _restyp_lib_file = None if info.accelrys: _default_restyp = '$(LIB)/restyp_accelrys.lib' else: _default_restyp = '$(LIB)/restyp.lib' group_restraints = None io = None edat = None libs = None def __init__(self, rand_seed=-8123, restyp_lib_file=_default_restyp, copy=None): logger.log.write_header_once() self.group_restraints = None if copy: self.libs = copy.libs self.io = io_data(copy=copy.io) self.edat = energy_data(copy=copy.edat) for member in copy.__dict__: if 'environ' not in member and member not in self.__dict__: self.__dict__[member] = copy.__dict__[member] else: self._rand_seed = rand_seed self._restyp_lib_file = restyp_lib_file self.libs = Libraries(self._restyp_lib_file, self._rand_seed) self.io = io_data() self.edat = energy_data() def copy(self): """Returns a copy of this environment""" return environ(copy=self) def system(self, command): """Run a shell command.""" return _modeller.mod_system(command, "") def dendrogram(self, matrix_file, cluster_cut): """Calculate a clustering tree.""" return _modeller.mod_dendrogram(matrix_file, cluster_cut) def principal_components(self, matrix_file, file): """Principal components clustering.""" return _modeller.mod_principal_components(matrix_file, file) def make_pssmdb(self, profile_list_file, pssmdb_name, profile_format='TEXT', rr_file='$(LIB)/as1.sim.mat', matrix_offset=0.0, matrix_scaling_factor=0.0069, pssm_weights_type='HH1'): """Create a database of PSSMs given a list of profiles""" return _modeller.mod_pssmdb_make(self.libs.modpt, profile_list_file, profile_format, rr_file, matrix_offset, matrix_scaling_factor, pssmdb_name, pssm_weights_type)
def __run_modeller(self, alignFile, loopRefinement): """. Parameters ---------- alignFile : string File containing the input data result : list The successfully calculated models are stored in this list loopRefinement : boolean If `True`, perform loop refinements Returns ------- list Successfully calculated models """ log.none() # instructs Modeller to display no log output. env = environ() # create a new MODELLER environment to build this model in # Directories for input atom files env.io.atom_files_directory = [str(self.filePath.rstrip('/')), ] env.schedule_scale = physical.values(default=1.0, soft_sphere=0.7) # Selected atoms do not feel the neighborhood # env.edat.nonbonded_sel_atoms = 2 env.io.hetatm = True # read in HETATM records from template PDBs env.io.water = True # read in WATER records (including waters marked as HETATMs) logger.debug( 'Performing loop refinement in addition to regular modelling: {}' .format(loopRefinement) ) if not loopRefinement: a = automodel( env, # alignment filename alnfile=str(alignFile), # codes of the templates knowns=(str(self.templateID)), # code of the target sequence=str(self.seqID), # wich method for validation should be calculated assess_methods=(assess.DOPE, assess.normalized_dope) ) else: a = dope_loopmodel( env, # alignment filename alnfile=str(alignFile), # codes of the templates knowns=(str(self.templateID)), # code of the target sequence=str(self.seqID), # wich method for validation should be calculated assess_methods=(assess.DOPE, assess.normalized_dope), loop_assess_methods=(assess.DOPE, assess.normalized_dope) ) # index of the first loop model a.loop.starting_model = self.loopStart # index of the last loop model a.loop.ending_model = self.loopEnd # loop refinement method; this yields a.loop.md_level = refine.slow a.starting_model = self.start # index of the first model a.ending_model = self.end # index of the last model # Very thorough VTFM optimization: a.library_schedule = autosched.slow a.max_var_iterations = 300 # Thorough MD optimization: # a.md_level = refine.slow a.md_level = None # Repeat the whole cycle 2 times and do not stop unless obj.func. > 1E6 # a.repeat_optimization = 2 a.max_molpdf = 2e5 # with helper.print_heartbeats(): # use 'long_wait' in .travis.yml with helper.log_print_statements(logger): a.make() # do the actual homology modeling # The output produced by modeller is stored in a.loop.outputs or a.outputs # it is a dictionary # Check for each model if it was successfully calculated, i.e. # for each "normal" model and each loop model and append the # assessment score to a list which is used to return the best model result = [] loop = False failures = [] # Add the normal output for i in range(len(a.outputs)): if not a.outputs[i]['failure']: model_filename = a.outputs[i]['name'] model_dope_score = a.outputs[i]['Normalized DOPE score'] logger.debug( 'Success! model_filename: {}, model_dope_score: {}' .format(model_filename, model_dope_score)) result.append((model_filename, model_dope_score)) else: failure = a.outputs[i]['failure'] logger.debug('Failure! {}'.format(failure)) failures.append(a.outputs[i]['failure']) # Add the loop refinement output if loopRefinement: logger.debug('Modeller loop outputs:') for i in range(len(a.loop.outputs)): if not a.loop.outputs[i]['failure']: model_filename = a.loop.outputs[i]['name'] model_dope_score = a.loop.outputs[i]['Normalized DOPE score'] logger.debug( 'Success! model_filename: {}, model_dope_score: {}' .format(model_filename, model_dope_score)) result.append((model_filename, model_dope_score)) loop = True else: failure = a.loop.outputs[i]['failure'] logger.debug('Failure! {}'.format(failure)) failures.append(failure) # Return the successfully calculated models and a loop flag indicating # whether the returned models are loop refined or not return result, loop, failures
def _get_schedule_scale(self): from modeller import physical schedule_scale = physical.values(default=0.) schedule_scale[self.pair_scorer._group] = 1. schedule_scale[self.atom_scorer._group] = 1. return schedule_scale
v[physical.nonbond_spline] = spline return v #: thorough optimization slow = schedule(4, [ step(CG, 2, mk_scale(default=0.01, nonbond=0.0)), step(CG, 4, mk_scale(default=0.10, nonbond=0.0)), step(CG, 6, mk_scale(default=0.50, nonbond=0.0)) ] + \ [ step(CG, rng, mk_scale(default=1.00, nonbond=0.0)) for rng in \ (8,10,14,18,20,24,30,25,40,45,50,55,60,70,80,90,100,120,140,160,200, 250,300,400,500) ] + \ [ step(CG, 600, mk_scale(default=1.00, nonbond=0.01)), step(CG, 800, mk_scale(default=1.00, nonbond=0.1)), step(CG, 1000, mk_scale(default=1.00, nonbond=0.5)), step(CG, 9999, physical.values(default=1.00)) ]) #: normal optimization normal = schedule(4, [ step(CG, 2, mk_scale(default=0.01, nonbond=0.0)), step(CG, 4, mk_scale(default=0.10, nonbond=0.0)), step(CG, 6, mk_scale(default=0.50, nonbond=0.0)) ] + \ [ step(CG, rng, mk_scale(default=1.00, nonbond=0.0)) for rng in \ (10,20,30,50,80,120,200,300) ] + \ [ step(CG, 500, mk_scale(default=1.00, nonbond=0.01)), step(CG, 800, mk_scale(default=1.00, nonbond=0.1)), step(CG, 1000, mk_scale(default=1.00, nonbond=0.5)), step(CG, 9999, physical.values(default=1.00)) ]) #: fast optimization fast = schedule(4,
def __run_modeller(self, alignFile, loopRefinement): """. Parameters ---------- alignFile : string File containing the input data result : list The successfully calculated models are stored in this list loopRefinement : boolean If `True`, perform loop refinements Returns ------- list Successfully calculated models """ log.none() # instructs Modeller to display no log output. env = environ( ) # create a new MODELLER environment to build this model in # Directories for input atom files env.io.atom_files_directory = [ str(self.filePath.rstrip("/")), ] env.schedule_scale = physical.values(default=1.0, soft_sphere=0.7) # Selected atoms do not feel the neighborhood # env.edat.nonbonded_sel_atoms = 2 env.io.hetatm = True # read in HETATM records from template PDBs env.io.water = True # read in WATER records (including waters marked as HETATMs) logger.debug( "Performing loop refinement in addition to regular modelling: {}". format(loopRefinement)) if not loopRefinement: a = automodel( env, # alignment filename alnfile=str(alignFile), # codes of the templates knowns=(str(self.templateID)), # code of the target sequence=str(self.seqID), # wich method for validation should be calculated assess_methods=(assess.DOPE, assess.normalized_dope), ) else: a = dope_loopmodel( env, # alignment filename alnfile=str(alignFile), # codes of the templates knowns=(str(self.templateID)), # code of the target sequence=str(self.seqID), # wich method for validation should be calculated assess_methods=(assess.DOPE, assess.normalized_dope), loop_assess_methods=(assess.DOPE, assess.normalized_dope), ) # index of the first loop model a.loop.starting_model = self.loopStart # index of the last loop model a.loop.ending_model = self.loopEnd # loop refinement method; this yields a.loop.md_level = refine.slow a.starting_model = self.start # index of the first model a.ending_model = self.end # index of the last model # Very thorough VTFM optimization: a.library_schedule = autosched.slow a.max_var_iterations = 300 # Thorough MD optimization: # a.md_level = refine.slow a.md_level = None # Repeat the whole cycle 2 times and do not stop unless obj.func. > 1E6 # a.repeat_optimization = 2 a.max_molpdf = 2e5 # with helper.print_heartbeats(): # use 'long_wait' in .travis.yml with helper.log_print_statements(logger): a.make() # do the actual homology modeling # The output produced by modeller is stored in a.loop.outputs or a.outputs # it is a dictionary # Check for each model if it was successfully calculated, i.e. # for each "normal" model and each loop model and append the # assessment score to a list which is used to return the best model result = [] loop = False failures = [] # Add the normal output for i in range(len(a.outputs)): if not a.outputs[i]["failure"]: model_filename = a.outputs[i]["name"] model_dope_score = a.outputs[i]["Normalized DOPE score"] logger.debug( "Success! model_filename: {}, model_dope_score: {}".format( model_filename, model_dope_score)) result.append((model_filename, model_dope_score)) else: failure = a.outputs[i]["failure"] logger.debug("Failure! {}".format(failure)) failures.append(a.outputs[i]["failure"]) # Add the loop refinement output if loopRefinement: logger.debug("Modeller loop outputs:") for i in range(len(a.loop.outputs)): if not a.loop.outputs[i]["failure"]: model_filename = a.loop.outputs[i]["name"] model_dope_score = a.loop.outputs[i][ "Normalized DOPE score"] logger.debug( "Success! model_filename: {}, model_dope_score: {}". format(model_filename, model_dope_score)) result.append((model_filename, model_dope_score)) loop = True else: failure = a.loop.outputs[i]["failure"] logger.debug("Failure! {}".format(failure)) failures.append(failure) # Return the successfully calculated models and a loop flag indicating # whether the returned models are loop refined or not return result, loop, failures