def init_project(p_name, sys_name=None, m_freq=None, p_freq=None, platform=None, dbhost=None, w_threads=None): #def init_project(p_name, **freq): from adaptivemd import Project #if p_name in Project.list(): # print(project.name, "Deleting existing version of this test project") # Project.delete(p_name) if dbhost is not None: Project.set_dbhost(dbhost) project = Project(p_name) if project.name in Project.list(): print( project.name, "Project {0} exists, reading it from database".format( project.name)) else: from adaptivemd import File, OpenMMEngine from adaptivemd.analysis.pyemma import PyEMMAAnalysis ##################################### # NEW initialize sequence configuration_file = 'configuration.cfg' project.initialize(configuration_file) # # OLD initialize sequence #from adaptivemd import LocalResource #resource = LocalResource('/lustre/atlas/scratch/jrossyra/bip149/admd/') #project.initialize(resource) ##################################### f_name = '{0}.pdb'.format(sys_name) # only works if filestructure is preserved as described in 'jro_ntl9.ipynb' # and something akin to job script in 'admd_workers.pbs' is used f_base = 'file:///$ADAPTIVEMD/examples/files/{0}/'.format(sys_name) f_structure = File(f_base + f_name).load() f_system_2 = File(f_base + 'system-2.xml').load() f_integrator_2 = File(f_base + 'integrator-2.xml').load() f_system_5 = File(f_base + 'system-5.xml').load() f_integrator_5 = File(f_base + 'integrator-5.xml').load() sim_args = '-r -p {0}'.format(platform) if platform == 'CPU': print( project.name, "Using CPU simulation platform with {0} threads per worker". format(w_threads)) sim_args += ' --cpu-cpu-threads {0}'.format(w_threads) engine_2 = OpenMMEngine(f_system_2, f_integrator_2, f_structure, sim_args).named('openmm-2') engine_5 = OpenMMEngine(f_system_5, f_integrator_5, f_structure, sim_args).named('openmm-5') m_freq_2 = m_freq p_freq_2 = p_freq m_freq_5 = m_freq * 2 / 5 p_freq_5 = p_freq * 2 / 5 engine_2.add_output_type('master', 'allatoms.dcd', stride=m_freq_2) engine_2.add_output_type('protein', 'protein.dcd', stride=p_freq_2, selection='protein') engine_5.add_output_type('master', 'allatoms.dcd', stride=m_freq_5) engine_5.add_output_type('protein', 'protein.dcd', stride=p_freq_5, selection='protein') ca_features = {'add_distances_ca': None} #features = {'add_inverse_distances': {'select_Backbone': None}} ca_modeller_2 = PyEMMAAnalysis(engine_2, 'protein', ca_features).named('pyemma-ca-2') ca_modeller_5 = PyEMMAAnalysis(engine_5, 'protein', ca_features).named('pyemma-ca-5') pos = [ '(rescode K and mass > 13) ' + 'or (rescode R and mass > 13) ' + 'or (rescode H and mass > 13)' ] neg = ['(rescode D and mass > 13) ' + 'or (rescode E and mass > 13)'] ionic_features = { 'add_distances': { 'select': pos }, 'kwargs': { 'indices2': { 'select': neg } } } all_features = [ca_features, ionic_features] #ok#ionic_modeller = {'add_distances': {'select': #ok# ['rescode K or rescode R or rescode H']}, #ok# 'kwargs': {'indices2': {'select': #ok# 'rescode D or rescode E']}}} #contact_features = [ {'add_inverse_distances': # {'select_Backbone': None}}, # {'add_residue_mindist': None, # 'kwargs': {'threshold': 0.6}} # ] all_modeller_2 = PyEMMAAnalysis(engine_2, 'protein', all_features).named('pyemma-ionic-2') all_modeller_5 = PyEMMAAnalysis(engine_5, 'protein', all_features).named('pyemma-ionic-5') project.generators.add(ca_modeller_2) project.generators.add(all_modeller_2) project.generators.add(ca_modeller_5) project.generators.add(all_modeller_5) project.generators.add(engine_2) project.generators.add(engine_5) [print(g) for g in project.generators] return project
resource = LocalResource(15, 2) resource.add_path(amp.path_conda_local_jhp) elif resource_id == 'local.sheep': resource = LocalResource(15, 2) resource.add_path(amp.path_conda_local_sheep) elif resource_id == 'fub.allegro': resource = AllegroCluster(15, 4, 'big') resource.add_path(amp.path_conda_allegro_jhp) else: resource = LocalResource(1, 2) # -------------------------------------------------------------------------- # CREATE THE ENGINE # the instance to create trajectories # -------------------------------------------------------------------------- pdb_file = File('file://input.pdb') engine = OpenMMEngine(pdb_file=pdb_file, system_file=File('file://system.xml'), integrator_file=File('file://integrator.xml')) engine.args = '-r --report-interval 1 -p fastest --store-interval 1' # -------------------------------------------------------------------------- # CREATE THE CLUSTER # the instance that runs the simulations on the resource # -------------------------------------------------------------------------- cluster = MDCluster(system='alanine', resource=resource, report=report) # add the path to CONDA if now already in the default
project = Project(project_name) if len(sys.argv) == 3: if resource_id == 'local.jhp': project.initialize(LocalJHP) elif resource_id == 'local.sheep': project.initialize(LocalSheep) elif resource_id == 'fub.allegro': project.initialize(AllegroCluster) # -------------------------------------------------------------------------- # CREATE THE ENGINE # the instance to create trajectories # -------------------------------------------------------------------------- pdb_file = File('file://files/input.pdb') engine = OpenMMEngine( pdb_file=pdb_file, system_file=File('file://files/system.xml'), integrator_file=File('file://files/integrator.xml'), args='-r --report-interval 1 -p CPU --store-interval 1') # -------------------------------------------------------------------------- # CREATE THE MODELLER # the instance to create msm models # -------------------------------------------------------------------------- modeller = PyEMMAAnalysis(pdb_file=pdb_file, source_folder=File('../staging_area/ntl9/trajs')) # --------------------------------------------------------------------------
if __name__ == '__main__': project = Project('testcase') # -------------------------------------------------------------------------- # CREATE THE RESOURCE # the instance to know about the place where we run simulations # -------------------------------------------------------------------------- project.initialize(LocalResource('$HOME/miniconda2/bin')) # -------------------------------------------------------------------------- # CREATE THE ENGINE # the instance to create trajectories # -------------------------------------------------------------------------- pdb_file = File('file://../files/alanine/alanine.pdb').named('initial_pdb') engine = OpenMMEngine( pdb_file=pdb_file, system_file=File('file://../files/alanine/system.xml'), integrator_file=File('file://../files/alanine/integrator.xml'), args='-r --report-interval 1 -p CPU --store-interval 1').named( 'openmm') # -------------------------------------------------------------------------- # CREATE AN ANALYZER # the instance that knows how to compute a msm from the trajectories # -------------------------------------------------------------------------- modeller = PyEMMAAnalysis(pdb_file=pdb_file).named('pyemma')
def init_project(p_name, sys_name=None, m_freq=None, p_freq=None, platform=None, reinitialize=False): #, dblocation=None): #def init_project(p_name, **freq): from adaptivemd import Project #if p_name in Project.list(): # print("Deleting existing version of this test project") # Project.delete(p_name) dburl = os.environ.get("ADMD_DBURL", 0) if dburl: logger.info("Set ADMD_DBURL to: " + dburl) Project.set_dburl(dburl) # if dblocation is not None: # Project.set_dblocation(dblocation) if reinitialize: logger.info( "Project {0} exists, deleting it from database to reinialize". format(p_name)) Project.delete(p_name) if p_name in Project.list(): logger.info( "Project {0} exists, reading it from database".format(p_name)) project = Project(p_name) elif not all([sys_name, m_freq, p_freq, platform]): raise ValueError( "Must define all parameters [{0}] to initialize new project\nHave: {1}" .format("sys_name,m_freq,p_freq,platform", [sys_name, m_freq, p_freq, platform].__repr__())) else: project = Project(p_name) from adaptivemd import File, OpenMMEngine from adaptivemd.analysis.pyemma import PyEMMAAnalysis # Initialize w/ config file: 1 of multiple options # TODO add config filename argument configuration_file = 'configuration.cfg' project.initialize(configuration_file) f_name = '{0}.pdb'.format(sys_name) # FIXME add system specifications to configuration file f_base = 'file:///$ADMD_FILES/{0}/'.format(sys_name) f_structure = File(f_base + f_name).load() f_system_2 = File(f_base + 'system-2.xml').load() f_integrator_2 = File(f_base + 'integrator-2.xml').load() f_system_5 = File(f_base + 'system-5.xml').load() f_integrator_5 = File(f_base + 'integrator-5.xml').load() sim_args = '-r -p {0}'.format(platform) engine_2 = OpenMMEngine(f_system_2, f_integrator_2, f_structure, sim_args).named('openmm-2') engine_5 = OpenMMEngine(f_system_5, f_integrator_5, f_structure, sim_args).named('openmm-5') # FIXME this is dumb and hard for user to deal with # TODO engine selection by name m_freq_2 = m_freq p_freq_2 = p_freq m_freq_5 = m_freq * 2 / 5 p_freq_5 = p_freq * 2 / 5 engine_2.add_output_type('master', 'allatoms.dcd', stride=m_freq_2) engine_2.add_output_type('protein', 'protein.dcd', stride=p_freq_2, selection='protein') engine_5.add_output_type('master', 'allatoms.dcd', stride=m_freq_5) engine_5.add_output_type('protein', 'protein.dcd', stride=p_freq_5, selection='protein') ca_features = {'add_distances_ca': None} #features = {'add_inverse_distances': {'select_Backbone': None}} ca_modeller_2 = PyEMMAAnalysis(engine_2, 'protein', ca_features).named('pyemma-ca-2') ca_modeller_5 = PyEMMAAnalysis(engine_5, 'protein', ca_features).named('pyemma-ca-5') pos = [ '(rescode K and mass > 13) ' + 'or (rescode R and mass > 13) ' + 'or (rescode H and mass > 13)' ] neg = ['(rescode D and mass > 13) ' + 'or (rescode E and mass > 13)'] ionic_features = { 'add_distances': { 'select': pos }, 'kwargs': { 'indices2': { 'select': neg } } } all_features = [ca_features, ionic_features] inv_ca_features = {'add_inverse_distances': {'select_Ca': None}} #ok#ionic_modeller = {'add_distances': {'select': #ok# ['rescode K or rescode R or rescode H']}, #ok# 'kwargs': {'indices2': {'select': #ok# 'rescode D or rescode E']}}} #contact_features = [ {'add_inverse_distances': # {'select_Backbone': None}}, # {'add_residue_mindist': None, # 'kwargs': {'threshold': 0.6}} # ] all_modeller_2 = PyEMMAAnalysis(engine_2, 'protein', all_features).named('pyemma-ionic-2') all_modeller_5 = PyEMMAAnalysis(engine_5, 'protein', all_features).named('pyemma-ionic-5') inv_modeller_2 = PyEMMAAnalysis( engine_2, 'protein', inv_ca_features).named('pyemma-invca-2') inv_modeller_5 = PyEMMAAnalysis( engine_5, 'protein', inv_ca_features).named('pyemma-invca-5') project.generators.add(ca_modeller_2) project.generators.add(all_modeller_2) project.generators.add(inv_modeller_2) project.generators.add(ca_modeller_5) project.generators.add(all_modeller_5) project.generators.add(inv_modeller_5) project.generators.add(engine_2) project.generators.add(engine_5) #[print(g) for g in project.generators] return project
def test(self): # ---------------------------------------------------------------------- # CREATE THE ENGINE # the instance to create trajectories # ---------------------------------------------------------------------- pdb_file = File('file://{0}alanine.pdb'.format( self.f_base)).named('initial_pdb').load() engine = OpenMMEngine( pdb_file=pdb_file, system_file=File('file://{0}system.xml'.format( self.f_base)).load(), integrator_file=File('file://{0}integrator.xml'.format( self.f_base)).load(), args='-r --report-interval 1 -p CPU --store-interval 1 -v').named( 'openmm') # ---------------------------------------------------------------------- # CREATE AN ANALYZER # the instance that knows how to compute a msm from the trajectories # ---------------------------------------------------------------------- modeller = PyEMMAAnalysis(engine=engine).named('pyemma') self.project.generators.add(engine) self.project.generators.add(modeller) def strategy(loops=1, trajs_per_loop=1, length=1): initial_traj = self.project.new_trajectory(frame=pdb_file, length=length) task = engine.run(initial_traj) self.project.queue(task) yield task.is_done for loop in range(loops): # submit some trajectory tasks trajectories = self.project.new_ml_trajectory( engine=engine, length=length, number=trajs_per_loop) tasks = tuple(map(engine.run, trajectories)) self.project.queue(tasks) print("queued %s tasks" % len(tasks)) # continue if ALL of the tasks are done (can be failed) yield [task.is_done for task in tasks] # submit a model job task = modeller.execute(list(self.project.trajectories)) self.project.queue(task) print("queued modeller task") # when it is done do next loop yield task.is_done # TODO worker/MD running in subprocess thread horribly slow # - can it be made to run a bit faster? n_loops = 1 trajs_per_loop = 1 self.project.add_event( strategy(loops=n_loops, trajs_per_loop=trajs_per_loop)) self.project.run() self.project.wait_until(self.project.on_ntraj(n_loops * trajs_per_loop)) self.assertEqual(len(list(self.project.trajectories)), n_loops * trajs_per_loop) self.project.close()
def test(self): # ---------------------------------------------------------------------- # CREATE THE ENGINE # the instance to create trajectories # ---------------------------------------------------------------------- pdb_file = File('file://{0}alanine.pdb'.format( self.f_base)).named('initial_pdb').load() engine = OpenMMEngine( pdb_file=pdb_file, system_file=File('file://{0}system.xml'.format( self.f_base)).load(), integrator_file=File('file://{0}integrator.xml'.format( self.f_base)).load(), args='-r --report-interval 1 -p CPU --store-interval 1').named( 'openmm') # ---------------------------------------------------------------------- # CREATE AN ANALYZER # the instance that knows how to compute a msm from the trajectories # ---------------------------------------------------------------------- modeller = PyEMMAAnalysis(engine=engine).named('pyemma') self.project.generators.add(engine) self.project.generators.add(modeller) # ---------------------------------------------------------------------- # CREATE THE CLUSTER # the instance that runs the simulations on the resource # ---------------------------------------------------------------------- traj_len = 1 trajectory = self.project.new_trajectory(engine['pdb_file'], traj_len, engine) task = engine.run(trajectory) # self.project.queue(task) pdb = md.load('{0}alanine.pdb'.format(self.f_base)) # this part fakes a running worker without starting the worker process worker = WorkerScheduler(self.project.configuration, verbose=True) worker.enter(self.project) worker.submit(task) self.assertEqual(len(self.project.trajectories), 0) while not task.is_done(): worker.advance() try: assert (len(self.project.trajectories) == 1) except AssertionError: print("stderr from worker task: \n%s" % task.stderr) print("stdout from worker task: \n%s" % task.stdout) raise print("stdout of worker:\n%s" % task.stdout) # FIXME: the worker space is cleared, so the trajectory paths are not valid anymore. # traj_path = os.path.join( # worker.path, # 'workers', # 'worker.' + hex(task.__uuid__), # worker.replace_prefix(self.project.trajectories.one.url) # ) # this is a workaround, but assumes that sandbox:// lives on the same fs. traj_path = os.path.join(self.shared_path, self.project.trajectories.one.dirname[1:], 'output.dcd') assert (os.path.exists(traj_path)), traj_path # go back to the place where we ran the test traj = md.load(traj_path, top=pdb) assert (len(traj) == traj_len + 1), len(traj) # well, we have a 100 step trajectory which matches the size of the initial PDB # that is a good sign # extend the trajectory by 10 task2 = task.extend(10) worker.submit(task2) while not task2.is_done(): worker.advance() # should still be one, since we have the same trajectory assert (len(self.project.trajectories) == 1) traj = md.load(traj_path, top=pdb) self.assertEqual(len(traj), traj_len + 10 + 1) # after extension it is traj_len + 10 frames. Excellent self.project.close()