def fetchData(selection,saveDir): cmd = "cpcc get %s"%selection res = executeCommand(shlex.split(cmd)) lines = res.split("\n") files = [] #remove first and two last lines if lines: del(lines[0]) del(lines[-1]) del(lines[-1]) index = 0 regex =r"(.*)\.(.*)" for line in lines: filename = line.strip().strip(",").split("/")[-1] if filename!="None": #yes this is a none string from the terminal output m = re.match(regex,filename) filename = "%s_%s.%s"%(m.group(1),index,m.group(2)) files+=filename cmd = "cpcc getf %s[%s]"%(selection,index) res = executeCommand(shlex.split(cmd)) with open("%s/%s"%(saveDir,filename),"w") as f: f.write(res) index+=1 return files
#!/usr/bin/env python import re import shlex from lib.md_tools import executeCommand from lib.util import * # gets tpr files from a production workflow cmd = "cpcc cd %s" % getProjectName() executeCommand(shlex.split(cmd)) cmd = "cpcc get mdrun.in.tpr" res = executeCommand(shlex.split(cmd)) lines = res.split("\n") # remove first and two last lines del (lines[0]) del (lines[-1]) del (lines[-1]) index = 0 regex = r"(.*)\.(.*)" if not os.path.exists(ProjectDirectories.TPR_DIR): os.makedirs(ProjectDirectories.TPR_DIR) # TODO use CpcUtil.fetchData()
#creates a folder named xtc, aggregates the data in a format that is expected for msmbuilder #for easy backtracking each traj is suffixed with their project name parser = ArgumentParser() parser.add_argument("projects",nargs='*', help="Path to projects that we want to include data from") parser.add_argument("--trajtype",default='tmd_backbone', help="Which of the processed trajectories that we wish to use (the subfolders in the analysis directory)") args = parser.parse_args() xtcDir = "XTC" if not os.path.exists(xtcDir): executeCommand(["mkdir",'-p',xtcDir]) index=0 for projectDir in args.projects: print projectDir regex = ".*/(.*)/" m = re.match(regex,projectDir) projectName = m.group(1) path = "%s/analysis/%s/"%(projectDir,args.trajtype) xtcs = "%s/*xtc"%(path) trajs = [os.path.basename(f) for f in sorted(glob.glob(xtcs))] for traj in trajs: src = "%s/%s"%(path,traj) #create dir structure for msmbuilder
def buildMDWorkflow(projectName,gromppPath,filePathList,maxCores=24,maxFiles=None,cmdLine=None): ''' creates a workflow for md simulation using grompp and mdrun function blocks ''' cmd = "cpcc start %s"%projectName executeCommand(shlex.split(cmd)) cmd="cpcc import gromacs" executeCommand(shlex.split(cmd)) cmd="cpcc instance gromacs::grompps grompp" executeCommand(shlex.split(cmd)) cmd="cpcc instance gromacs::mdruns mdrun" executeCommand(shlex.split(cmd)) cmd ="cpcc transact" executeCommand(shlex.split(cmd)) cmd="cpcc connect grompp:out.tpr mdrun:in.tpr" executeCommand(shlex.split(cmd)) #maximum number of cores to user per simulation. if not set simulations will be tuned cmd="cpcc set mdrun.in.resources[0].max.cores %s"%maxCores executeCommand(shlex.split(cmd)) cmd="cpcc setf grompp.in.top[+] topol.top" executeCommand(shlex.split(cmd)) cmd ="cpcc setf grompp.in.mdp[+] %s"%gromppPath executeCommand(shlex.split(cmd)) gros = filePathList # maxFiles = 1 count = 0 for gro in gros: cmd="cpcc setf grompp.in.conf[+] %s"%gro executeCommand(shlex.split(cmd)) count+=1 #if we want to limit the number of files to submit in an easy way if maxFiles and count==maxFiles: break if(cmdLine): cmd = "cpcc set mdrun.in.cmdline_options[+] %s"%cmdLine executeCommand(shlex.split(cmd)) cmd="cpcc commit" executeCommand(shlex.split(cmd)) cmd="cpcc activate" executeCommand(shlex.split(cmd))
def buildMDWorkflowGLIC(projectName,gromppPath,filePathList,maxCores=24,maxFiles=None,cmdLine=None): ''' creates a workflow for md simulation using grompp and mdrun function blocks ''' cmd = "cpcc start %s"%projectName executeCommand(shlex.split(cmd)) cmd="cpcc import gromacs" executeCommand(shlex.split(cmd)) cmd="cpcc instance gromacs::grompps grompp" executeCommand(shlex.split(cmd)) cmd="cpcc instance gromacs::mdruns mdrun" executeCommand(shlex.split(cmd)) cmd ="cpcc transact" executeCommand(shlex.split(cmd)) cmd="cpcc connect grompp:out.tpr mdrun:in.tpr" executeCommand(shlex.split(cmd)) #maximum number of cores to user per simulation. if not set simulations will be tuned cmd="cpcc set mdrun.in.resources[0].max.cores %s"%maxCores executeCommand(shlex.split(cmd)) cmd="cpcc setf grompp.in.top[+] topol.top" executeCommand(shlex.split(cmd)) cmd ="cpcc setf grompp.in.mdp[+] %s"%gromppPath executeCommand(shlex.split(cmd)) #this is a 2d array cmd ="cpcc setf grompp.in.include[0][0] topol_Protein_chain_A.itp" #needed for equilibrations #cmd ="cpcc setf grompp.in.include[+][+] posre_Protein_chain_A.itp" executeCommand(shlex.split(cmd)) cmd ="cpcc setf grompp.in.include[0][1] %s"%os.path.join(MDToolsDirectories.OTHER,"ffnonbonded.itp") executeCommand(shlex.split(cmd)) cmd ="cpcc setf grompp.in.include[0][2] %s"%os.path.join(MDToolsDirectories.OTHER,"ffbonded.itp") executeCommand(shlex.split(cmd)) cmd ="cpcc setf grompp.in.include[0][3] %s"%os.path.join(MDToolsDirectories.POPC_FF_DIR,"popc.itp") executeCommand(shlex.split(cmd)) gros = filePathList # maxFiles = 1 count = 0 for gro in gros: cmd="cpcc setf grompp.in.conf[+] %s"%gro executeCommand(shlex.split(cmd)) count+=1 #if we want to limit the number of files to submit in an easy way if maxFiles and count==maxFiles: break if(cmdLine): cmd = "cpcc set mdrun.in.cmdline_options[+] %s"%cmdLine executeCommand(shlex.split(cmd)) cmd="cpcc commit" executeCommand(shlex.split(cmd)) cmd="cpcc activate" executeCommand(shlex.split(cmd))
#!/usr/bin/env python import shlex from lib.cpcUtil import CpcUtil from lib.md_tools import executeCommand from lib.util import * # gets trajectory files from a production workflow # removes the trajectories in the traj dir. # useful since they already exist in copernicus and we only need them temporarily for creating analysis trajs cmd = "rm %s/*" % ProjectDirectories.TRAJ_DIR if executeCommand(shlex.split(cmd)): print "trajectories removed"
m = re.match(regex,dir) number = m.group(1) runs = "%s/_persistence/*run_*/*xtc"%(dir) xtc = [] for f in glob.glob(runs): st=os.stat(f) if st.st_size>0: xtc.append(f) if len(xtc)==0: print "no runs found in %s"%runs else: outfile = "%s/traj_%s.xtc"%(ProjectDirectories.TRAJ_DIR,number) xtc = sorted(xtc) cmd = ["trjcat" , "-f"] + xtc +["-o",outfile] executeCommand(cmd) #do a trjcat here and store it in the tpr dir # for dir in mdruns: # #get the mdrun number # m = re.match(regex,dir) # number = m.group(1) # tpr = "%s/_persistence/run_001/topol.tpr"%(dir) # # # tprout = "topol_%s.tpr"%number # cmd = ["cp" ,tpr,"%s/%s"%(ProjectDirectories.TPR_DIR,tprout)] # executeCommand(cmd)