def merge_mdp(inp): if inp.testing(): # if there are no inputs, we're testing wheter the command can run return fo = inp.getFunctionOutput() mdpfile = procSettings(inp, inp.getOutputDir()) fo.setOut('mdp', FileValue(mdpfile)) return fo
def pdb2gmx(inp): cmdnames = cmds.GromacsCommands() if inp.testing(): # if there are no inputs, we're testing wheter the command can run cpc.util.plugin.testCommand("%s -version" % cmdnames.pdb2gmx) return input_choices=inp.getInput('input_choices') if input_choices is None: input_choices='' pdbfile=inp.getInput('conf') #pdbfile=os.path.join(inp.getOutputDir(),inp.getInput('conf')) #shutil.copy(inp.getInput('conf'),pdbfile) forcefield=inp.getInput('ff') watermodel=inp.getInput('water') skip_hydrogens=True #default to ignh if inp.getInput('cmdline_options') is not None: cmdlineOpts=shlex.split(inp.getInput('cmdline_options')) else: cmdlineOpts=[] cmdline = cmdnames.pdb2gmx cmdline += ["-f", pdbfile, "-ff", forcefield, "-water", watermodel] if skip_hydrogens: cmdline.extend(["-ignh"]) cmdline.extend(cmdlineOpts) proc=subprocess.Popen(cmdline, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=inp.getOutputDir(), close_fds=True) (stdout, stderr)=proc.communicate(input_choices) if proc.returncode != 0: raise GromacsError("ERROR: pdb2gmx returned %s"%(stdout)) fo=inp.getFunctionOutput() fo.setOut('conf', FileValue(os.path.join(inp.getOutputDir(),'conf.gro'))) fo.setOut('top', FileValue(os.path.join(inp.getOutputDir(),'topol.top'))) #how do we handle itp output files? itpfiles=glob.glob(os.path.join(inp.getOutputDir(),'*.itp')) fo.setOut('include',itpfiles) for i in xrange(len(itpfiles)): fo.setOut('include[%d]'%(i),itpfiles[i]) return fo
def tune_fn(inp): cmdnames = cmds.GromacsCommands() if inp.testing(): # if there are no inputs, we're testing wheter the command can run #cpc.util.plugin.testCommand("grompp -version") #cpc.util.plugin.testCommand("mdrun -version") return fo = inp.getFunctionOutput() persDir = inp.getPersistentDir() mdpfile = procSettings(inp, inp.getOutputDir()) # copy the topology and include files topfile = os.path.join(inp.getOutputDir(), 'topol.top') shutil.copy(inp.getInput('top'), topfile) incl = inp.getInput('include') if incl is not None and len(incl) > 0: for i in range(len(incl)): filename = inp.getInput('include[%d]' % i) if filename is not None: # same name, but in one directory. nname = os.path.join(inp.getOutputDir(), os.path.split(filename)[1]) shutil.copy(filename, nname) # and execute grompp cmdlist = cmdnames.grompp.split() cmdlist += [ "-f", mdpfile, "-quiet", "-c", inp.getInput('conf'), "-p", 'topol.top', # we made sure it's there "-o", "topol.tpr" ] if inp.hasInput('ndx'): cmdlist.append('-n') cmdlist.append(inp.getInput('ndx')) proc = subprocess.Popen(cmdlist, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=inp.getOutputDir()) (stdo, stde) = proc.communicate(None) if proc.returncode != 0: #raise GromacsError("Error running grompp: %s"% # (open(stdoutfn,'r').read())) fo.setError("Error running grompp: %s, %s" % (stdo, stde)) return fo rsrc = Resources() tune.tune(rsrc, inp.getInput('conf'), os.path.join(inp.getOutputDir(), 'topol.tpr'), persDir) fo.setOut('mdp', FileValue(mdpfile)) fo.setOut('resources', rsrc.setOutputValue()) return fo
def g_energy(inp): cmdnames = cmds.GromacsCommands() if inp.testing(): # if there are no inputs, we're testing wheter the command can run cpc.util.plugin.testCommand("%s -version" % cmdnames.g_energy) return edrfile=inp.getInput('edr') item=inp.getInput('item') outDir=inp.getOutputDir() xvgoutname=os.path.join(outDir, "energy.xvg") cmdlist = cmdnames.g_energy.split() + ["-f", edrfile, "-o", xvgoutname] proc=subprocess.Popen(cmdlist, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=inp.getOutputDir(), close_fds=True) (stdout, stderr)=proc.communicate(item) if proc.returncode != 0: raise GromacsError("ERROR: g_energy returned %s"%(stdout)) regitem=re.compile(r'^%s'%(item)) regsplit=re.compile(r'---------------') splitmatch=False for line in iter(stdout.splitlines()): if not splitmatch: if regsplit.match(line): splitmatch=True else: if regitem.match(line): foundmatch=True sp=line.split() av=float(sp[1]) err=float(sp[2]) rmsd=float(sp[3]) drift=float(sp[4]) unit=sp[5] break if not foundmatch: raise GromacsError("ERROR: couldn't find match for energy item %s in output."% item) fo=inp.getFunctionOutput() fo.setOut('xvg', FileValue(xvgoutname)) fo.setOut('average', FloatValue(av)) fo.setOut('error', FloatValue(err)) fo.setOut('rmsd', FloatValue(rmsd)) fo.setOut('drift', FloatValue(drift)) fo.setOut('unit', StringValue(unit)) return fo
def _eneconv(inp, fo): """Internal implementation of eneconv""" cmdnames = cmds.GromacsCommands() if inp.testing(): # if there are no inputs, we're testing wheter the command can run cpc.util.plugin.testCommand("%s -version" % cmdnames.eneconv) return pers=cpc.dataflow.Persistence(os.path.join(inp.getPersistentDir(), "persistent.dat")) if pers.get('init') is None: init=True pers.set('init', 1) else: inpItems=[ 'edr_files', 'scalefac', 'dt', 'offset', 'cmdline_options' ] if not checkUpdated(inp, inpItems): return init=False writeStdin=StringIO() edrFilesList=inp.getInput('edr_files') outDir=inp.getOutputDir() edrOutname=os.path.join(outDir, "fixed.edr") #cmdline=["eneconv", '-f', edrFiles, '-o', edrOutname] cmdline = cmdnames.eneconv.split() + ['-f'] for i in xrange(len(edrFilesList)): cmdline.append(inp.getInput('edr_files[%d]' % i)) cmdline.extend(['-o', edrOutname]) first_frame_ps=inp.getInput('first_frame_ps') if first_frame_ps is not None: cmdline.extend(['-b', "%g"%first_frame_ps] ) last_frame_ps=inp.getInput('last_frame_ps') if last_frame_ps is not None: cmdline.extend(['-e', "%g"%last_frame_ps] ) dt=inp.getInput('dt') if dt is not None: cmdline.extend(['-dt', "%g"%dt] ) offset=inp.getInput('offset') if offset is not None: cmdline.extend(['-offset', "%g"%offset] ) scaleF=inp.getInput('scalefac') if scaleF is not None: cmdline.extend(['-scalefac', "%g"%scaleF] ) if inp.getInput('cmdline_options') is not None: cmdlineOpts=shlex.split(inp.getInput('cmdline_options')) else: cmdlineOpts=[] cmdline.extend(cmdlineOpts) log.debug(cmdline) proc=subprocess.Popen(cmdline, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=inp.getOutputDir(), close_fds=True) (stdout, stderr)=proc.communicate(writeStdin.getvalue()) if proc.returncode != 0: raise GromacsError("ERROR: eneconv returned %s"%(stdout)) fo.setOut('edr', FileValue(edrOutname)) pers.write()
def _trjconv(inp, fo, split): """Internal implementation of trjconv and trjconv_split""" cmdnames = cmds.GromacsCommands() if inp.testing(): # if there are no inputs, we're testing wheter the command can run cpc.util.plugin.testCommand("%s -version" % cmdnames.trjconv) return pers=cpc.dataflow.Persistence(os.path.join(inp.getPersistentDir(), "persistent.dat")) if pers.get('init') is None: init=True pers.set('init', 1) else: inpItems=[ 'traj', 'tpr', 'ndx', 'dt', 'skip', 'dump', 'pbc', 'ur', 'center', 'fit', 'fit_type', 'cmdline_options' ] if not checkUpdated(inp, inpItems): return init=False writeStdin=StringIO() trajfile=inp.getInput('traj') tprfile=inp.getInput('tpr') #item=inp.getInput('item') outDir=inp.getOutputDir() xtcoutname=os.path.join(outDir, "trajout.xtc") grooutname=os.path.join(outDir, "out.gro") cmdline = cmdnames.trjconv.split() + ['-s', tprfile, '-f', trajfile] if not split: cmdline.extend(['-o', xtcoutname]) else: cmdline.extend(['-sep', '-o', grooutname]) ndxfile=inp.getInput('ndx') if ndxfile is not None: cmdline.extend(['-n', ndxfile] ) first_frame_ps=inp.getInput('first_frame_ps') if first_frame_ps is not None: cmdline.extend(['-b', "%g"%first_frame_ps] ) last_frame_ps=inp.getInput('last_frame_ps') if last_frame_ps is not None: cmdline.extend(['-b', "%g"%last_frame_ps] ) dt=inp.getInput('dt') if dt is not None: cmdline.extend(['-dt', "%g"%dt] ) skip=inp.getInput('skip') if skip is not None: cmdline.extend(['-skip', "%d"%skip] ) dump=inp.getInput('dump') if dump is not None: cmdline.extend(['-dump', "%g"%dump] ) pbc=inp.getInput('pbc') if pbc is not None: cmdline.extend(['-pbc', pbc] ) ur=inp.getInput('ur') if ur is not None: cmdline.extend(['-ur', ur] ) center=inp.getInput('center') if center is not None: cmdline.extend(['-center']) writeStdin.write("%s\n"%center) fit=inp.getInput('fit') fit_type=inp.getInput('fit_type') if fit is not None: if center is not None: raise GromacsError('Both fit and center set') if fit_type is None: fit_type='rot+trans' cmdline.extend(['-fit', fit_type]) writeStdin.write("%s\n"%fit) if inp.getInput('cmdline_options') is not None: cmdlineOpts=shlex.split(inp.getInput('cmdline_options')) else: cmdlineOpts=[] cmdline.extend(cmdlineOpts) log.debug(cmdline) outputGroup=inp.getInput('output_group') if outputGroup is not None: writeStdin.write("%s\n"%outputGroup) else: writeStdin.write("System\n") proc=subprocess.Popen(cmdline, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=inp.getOutputDir(), close_fds=True) (stdout, stderr)=proc.communicate(writeStdin.getvalue()) if proc.returncode != 0: raise GromacsError("ERROR: trjconv returned %s"%(stdout)) if not split: fo.setOut('xtc', FileValue(xtcoutname)) else: i=0 # iterate as long as there are files with anme 'out%d.gro' for # increasing i while True: filename=os.path.join(outDir, 'out%d.gro'%i) if not os.path.exists(filename): break fo.setOut('confs[%d]'%i, FileValue(filename)) i+=1 pers.write()
def createMacroStates(self): ''' Build a macro-state MSM ''' # Again we redirect output #stdoutfn=os.path.join(self.inp.getOutputDir(), 'msm_stdout_macro.txt') #stderrfn=os.path.join(self.inp.getOutputDir(), 'msm_stderr_macro.txt') #old_stdout = sys.stdout #sys.stdout=open(stdoutfn,'w') #old_stderr = sys.stderr #sys.stderr=open(stderrfn,'w') Map = msmbuilder.MSMLib.PCCA(self.T, self.num_macro) Assignments = self.assignments Assignments = Map[Assignments] NumStates = max(Assignments.flatten()) + 1 sys.stderr.write("Calculating macrostates with lag time %g.\n" % self.lag_time) # Now repeat any calculations with the new assignments Counts = msmbuilder.MSMLib.GetCountMatrixFromAssignments( Assignments, self.num_macro, LagTime=self.lag_time, Slide=True) #PK want reversible MLE estimator again here sys.stderr.write("Recalculating assignments & trimming again.\n") CountsAfterTrimming, Mapping = msmbuilder.MSMLib.ErgodicTrim(Counts) msmbuilder.MSMLib.ApplyMappingToAssignments(Assignments, Mapping) ReversibleCounts = msmbuilder.MSMLib.IterativeDetailedBalance( CountsAfterTrimming, Prior=0) TC = msmbuilder.MSMLib.EstimateTransitionMatrix(ReversibleCounts) Populations = numpy.array(ReversibleCounts.sum(0)).flatten() Populations /= Populations.sum() # Again, get the most populated state X0 = array((Counts + Counts.transpose()).sum(0)).flatten() X0 = X0 / sum(X0) MaxState = argmax(X0) tcoutf = os.path.join(self.inp.getOutputDir(), "tc.dat") if scipy.sparse.issparse(TC): scipy.savetxt(tcoutf, TC.todense()) else: numpy.savetxt(tcoutf, TC, fmt="%12.6g") self.out.setOut('macro_transition_counts', FileValue(tcoutf)) woutf = os.path.join(self.inp.getOutputDir(), "weights.dat") numpy.savetxt(woutf, X0, fmt="%12.6g") self.out.setOut('macro_weights', FileValue(woutf)) # Do adaptive sampling on the macrostates nstates = int(self.num_macro * self.num_to_start) sys.stderr.write("Adaptive sampling to %d=%d*%d states.\n" % (nstates, self.num_macro, self.num_to_start)) Proj = self.Proj StartStates = Proj.AdaptiveSampling(Counts.toarray(), nstates) #print StartStates #PK note JustGetIndices gives indices into original conformations RandomConfs = Proj.GetRandomConfsFromEachState(Assignments, NumStates, 1, JustGetIndices=True) self.newRuns = [] self.macroConfs = [] for k, v in StartStates.items(): num_started = 0 for i in xrange(NumStates): if i == k: trajnum = RandomConfs[i][0][0] frame_nr = RandomConfs[i][0][1] lh5name = Proj.GetTrajFilename(trajnum) trajdata = self.trajData[lh5name] trajname = trajdata.xtc time = frame_nr * trajdata.dt #* self.nstxtcout #time = frame_nr * self.dt *self.nstxtcout #trajname = Proj.GetTrajFilename(trajnum) #trajname = trajname.replace('.nopbc.lh5','.xtc') first = True # Use trjconv to write new starting confs while (num_started < self.num_to_start): sys.stderr.write("Writing new start confs.\n") outfn = os.path.join( self.inp.getOutputDir(), 'macro%d-%d.gro' % (i, num_started)) args = self.cmdnames.trjconv.split() args += [ "-f", "%s" % trajname, "-s", self.tprfile, "-o", outfn, "-pbc", "mol", "-dump", "%d" % time ] proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=sys.stdout, stderr=sys.stderr) proc.communicate('0') num_started = num_started + 1 self.newRuns.append(outfn) if first: self.macroConfs.append(outfn) first = False # now set the macro state outputs: i = 0 for fname in self.macroConfs: self.out.setOut('macro_conf[%d]' % i, cpc.dataflow.FileValue(fname)) i += 1
def createMicroStates(self): ''' Build a micro-state MSM ''' sys.stderr.write("Creating msm project, ref_conf=%s.\n" % str(self.ref_conf)) # Create the msm project from the reference conformation #TODO IMAN provide weighting here Proj = CreateCopernicusProject(self.ref_conf, self.filelist) self.Proj = Proj C1 = Conformation.Conformation.LoadFromPDB(self.ref_conf) # Automate the clustering to only CA or backbone atoms # TODO: fix this a = C1["AtomNames"] AtomIndices = where((a == "N") | (a == "C") | (a == "CA") | (a == "O"))[0] sys.stderr.write("Cluster project.\n") # Do msm-stuff GenF = os.path.join('Data', 'Gens.nopbc.h5') AssF = os.path.join('Data', 'Ass.nopbc.h5') AssFTrimmed = os.path.join('Data', 'Assignment-trimmed.nopbc.h5') RmsF = os.path.join('Data', 'RMSD.nopbc.h5') Generators = Proj.ClusterProject(AtomIndices=AtomIndices, NumGen=self.num_micro, Stride=30) sys.stderr.write("Assign project.\n") Assignments, RMSD, WhichTrajs = Proj.AssignProject( Generators, AtomIndices=AtomIndices) if os.path.exists(GenF): os.remove(GenF) Generators.SaveToHDF(GenF) if os.path.exists(AssF): os.remove(AssF) msmbuilder.Serializer.SaveData(AssF, Assignments) if os.path.exists(RmsF): os.remove(RmsF) msmbuilder.Serializer.SaveData(RmsF, RMSD) sys.stderr.write("Trim data.\n") # Trim data Counts = msmbuilder.MSMLib.GetCountMatrixFromAssignments( Assignments, self.num_micro, LagTime=1, Slide=True) # Get the most populated state sys.stderr.write("Get the most populated state.\n") X0 = array((Counts + Counts.transpose()).sum(0)).flatten() X0 = X0 / sum(X0) MaxState = argmax(X0) ## Calculate only times up to at maximum half the ## length of an individual trajectory max_time = self.avgtime / 2. #max_time = ((self.dt * self.nstep / 1000)*0.5) ## SP this is almost certainly wrong: #if max_time > 1: # max_time=int(max_time) #else: # max_time=2 ###max_time = 300 # hard-coded for villin self.max_time = max_time # More trimming # PK want ErgodicTrim instead of EnforceMetastability # This is from BuildMSM script sys.stderr.write("More trimming...\n") CountsAfterTrimming, Mapping = msmbuilder.MSMLib.ErgodicTrim(Counts) msmbuilder.MSMLib.ApplyMappingToAssignments(Assignments, Mapping) ReversibleCounts = msmbuilder.MSMLib.IterativeDetailedBalance( CountsAfterTrimming, Prior=0) TC = msmbuilder.MSMLib.EstimateTransitionMatrix(ReversibleCounts) Populations = numpy.array(ReversibleCounts.sum(0)).flatten() Populations /= Populations.sum() self.assignments = Assignments self.T = TC NumStates = max(Assignments.flatten()) + 1 sys.stderr.write("New number of states=%d\n" % NumStates) if os.path.exists(AssFTrimmed): os.remove(AssFTrimmed) msmbuilder.Serializer.SaveData(AssFTrimmed, Assignments) sys.stderr.write("Calculating implied time scales..\n") # Calculate the implied time-scales time = numpy.arange(1, max_time + 1, 1) TS = msmbuilder.MSMLib.GetImpliedTimescales(AssFTrimmed, NumStates, time, NumImpliedTimes=len(time) + 1) sys.stderr.write("TS=%s, time=%s\n" % (str(TS), time)) try: plt.scatter(TS[:, 0], TS[:, 1]) plt.title('Lag times versus implied time scale') plt.xlabel('Lag Time (assignment-steps)') plt.ylabel('Implied Timescale (ps)') plt.yscale('log') timescalefn = os.path.join(self.inp.getOutputDir(), 'msm_timescales.png') sys.stderr.write('Writing timescale plot to %s' % timescalefn) try: plt.savefig(timescalefn) except: fo = StringIO() traceback.print_exception(sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2], file=fo) errmsg = "Run error generating timescale plot: %s\n" % ( fo.getvalue()) sys.stderr.write(errmsg) plt.close() self.out.setOut('timescales', FileValue(timescalefn)) except ValueError as e: fo = StringIO() traceback.print_exception(sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2], file=fo) errmsg = "Run error generating timescale plot: %s\n" % ( fo.getvalue()) sys.stderr.write(errmsg) # Get random confs from each state sys.stderr.write("Getting random configuration from each state..\n") RandomConfs = Proj.GetRandomConfsFromEachState(Assignments, NumStates, 1, JustGetIndices=True) # Compute the MaxState with the new assignments (ie. after trimming) sys.stderr.write("Computing MaxState.\n") Counts = msmbuilder.MSMLib.GetCountMatrixFromAssignments(Assignments, NumStates, LagTime=1, Slide=True) X0 = array((Counts + Counts.transpose()).sum(0)).flatten() X0 = X0 / sum(X0) MaxState = argmax(X0) # Create a tpr-file for trjconv with -pbc mol #sys.stderr.write("making randomconfs.\n") #try: # os.mkdir('RandomConfs') #except: # pass # we need a tpr file to be able to trjconv random confs later #proc = subprocess.Popen(["grompp","-f","%s"%self.mdpfile, # "-c","%s"%self.grofile[0], # "-p", "%s"%self.topfile,"-o", # "%s"%os.path.join(self.inp.getOutputDir(), # 'topol.tpr')], # stdin=None,stdout=sys.stdout, stderr=sys.stdout) #proc.communicate(None) # we pick one of the tpr files. self.tprfile = self.inp.getInput('trajectories[0].tpr') # Set a flag to indicate if we have written the maxstate.pdb-file have_maxstate = 0 for i in xrange(NumStates): traj_num = RandomConfs[i][0][0] frame_nr = RandomConfs[i][0][1] lh5name = Proj.GetTrajFilename(traj_num) #sys.stderr.write("trajectory name=%s\n"%lh5name) trajdata = self.trajData[lh5name] trajname = trajdata.xtc #trajname = trajname.replace('.nopbc.lh5','.xtc') time = frame_nr * trajdata.dt #* self.nstxtcout #if(i<10*self.num_to_start): #proc = subprocess.Popen(["trjconv","-f","%s"%trajname,"-s","%s"%os.path.join(self.inp.getOutputDir(),'topol.tpr'),"-o",os.path.join(self.inp.getOutputDir(),'micro%d.gro'%i),"-pbc","mol","-dump","%d"%time], stdin=subprocess.PIPE, stdout=sys.stdout, stderr=sys.stderr) #proc.communicate("0") # Write out a pdb of the most populated state if (i == MaxState and have_maxstate == 0): maxstatefn = os.path.join(self.inp.getOutputDir(), 'maxstate.pdb') sys.stderr.write("writing out pdb of most populated state.\n") args = self.cmdnames.trjconv.split() args += [ "-f", trajname, "-s", self.tprfile, "-o", maxstatefn, "-pbc", "mol", "-dump", "%d" % time ] if self.ndx is not None: args.extend(["-n", self.ndx]) proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=sys.stdout, stderr=sys.stderr) proc.communicate(self.grpname) self.out.setOut('maxstate', FileValue(maxstatefn)) have_maxstate = 1 # now evenly sample configurations and put them in the array # newRuns. If we're later assigning macrosates, we'll overwrite them # with adaptive sampling configurations self.newRuns = [] for j in xrange(self.num_to_start * self.num_macro): # pick a cluster at random: i = random.random() * int(NumStates) traj_num = RandomConfs[i][0][0] frame_nr = RandomConfs[i][0][1] lh5name = Proj.GetTrajFilename(traj_num) trajdata = self.trajData[lh5name] trajname = trajdata.xtc time = frame_nr * trajdata.dt #maxstatefn=os.path.join(self.inp.getOutputDir(), '.conf') outfn = os.path.join(self.inp.getOutputDir(), 'new_run_%d.gro' % (j)) args = self.cmdnames.trjconv.split() args += [ "-f", "%s" % trajname, "-s", self.tprfile, "-o", outfn, "-pbc", "mol", "-dump", "%d" % time ] sys.stderr.write("writing out new run %s .\n" % outfn) proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=sys.stdout, stderr=sys.stderr) proc.communicate('0') self.newRuns.append(outfn)
def extractData(confout, outDir, persDir, fo): """Concatenate all output data from the partial runs into the end results""" cmdnames = cmds.GromacsCommands() #outputs=dict() # Concatenate stuff confoutPath = os.path.join(outDir, "confout.gro") shutil.copy(confout[0], confoutPath) #outputs['conf'] = Value(confoutPath, # inp.function.getOutput('conf').getType()) fo.setOut('conf', FileValue(confoutPath)) # fix the xtc files xtcso = sorted(glob.glob(os.path.join(persDir, "run_???", "traj.*xtc"))) # cull empty files and duplicate trajectory names xtcs = [] xtcbase = [] try: for file in xtcso: st = os.stat(file) base = os.path.split(file)[1] if st.st_size > 0: if base not in xtcbase: xtcs.append(file) xtcbase.append(base) else: # there already was a file with this name. Overwrite # it because mdrun wasn't aware of it when writing. ind = xtcbase.index(base) xtcs[ind] = file except OSError: pass # concatenate them xtcoutname = os.path.join(outDir, "traj.xtc") if len(xtcs) > 0: cmd = cmdnames.trjcat.split() + ["-f"] cmd.extend(xtcs) cmd.extend(["-o", xtcoutname]) stdo = open(os.path.join(persDir, "trjcat_xtc.out"), "w") sp = subprocess.Popen(cmd, stdout=stdo, stderr=subprocess.STDOUT) sp.communicate(None) stdo.close() fo.setOut('xtc', FileValue(xtcoutname)) # do the trrs trrso = sorted(glob.glob(os.path.join(persDir, "run_???", "traj.*trr"))) # cull empty files and duplicate trajectory names trrs = [] trrbase = [] try: for file in trrso: st = os.stat(file) base = os.path.split(file)[1] if st.st_size > 0: if base not in trrbase: trrs.append(file) trrbase.append(base) else: # there already was a file with this name. Overwrite # it because mdrun wasn't aware of it when writing. ind = trrbase.index(base) trrs[ind] = file except OSError: pass # concatenate them trroutname = os.path.join(outDir, "traj.trr") if len(trrs) > 0: cmd = cmdnames.trjcat.split() + ["-f"] cmd.extend(trrs) cmd.extend(["-o", trroutname]) stdo = open(os.path.join(persDir, "trjcat_trr.out"), "w") sp = subprocess.Popen(cmd, stdout=stdo, stderr=subprocess.STDOUT) sp.communicate(None) stdo.close() fo.setOut('trr', FileValue(trroutname)) # and the edrs edrso = glob.glob(os.path.join(persDir, "run_???", "ener.*edr")) # cull empty files and duplicate trajectory names edrs = [] edrbase = [] try: for file in edrso: st = os.stat(file) base = os.path.split(file)[1] if st.st_size > 0: if base not in edrbase: edrs.append(file) edrbase.append(base) else: # there already was a file with this name. Overwrite # it because mdrun wasn't aware of it when writing. ind = edrbase.index(base) log.debug("Overwriting existing edr file %s with %s" % (edrs[ind], file)) edrs[ind] = file except OSError: pass edroutname = os.path.join(outDir, "ener.edr") if len(edrs) > 1: log.debug("Concatenating edr files: %s" % edrs) # concatenate them if len(edrs) > 0: cmd = cmdnames.eneconv.split() + ["-f"] cmd.extend(edrs) cmd.extend(["-o", edroutname]) stdo = open(os.path.join(persDir, "eneconv.out"), "w") sp = subprocess.Popen(cmd, stdout=stdo, stderr=subprocess.STDOUT) sp.communicate(None) stdo.close() log.debug("Setting edr output to %s" % edroutname) fo.setOut('edr', FileValue(edroutname)) # do the stdout stdouto = glob.glob(os.path.join(persDir, "run_???", "stdout")) stdoutname = os.path.join(outDir, "stdout") outf = open(stdoutname, "w") for infile in stdouto: inf = open(infile, "r") outf.write(inf.read()) inf.close() outf.write("%s\n" % time.strftime("%a, %d %b %Y %H:%M:%S")) outf.write("%f\n" % time.time()) outf.close() fo.setOut('stdout', FileValue(stdoutname)) # do the stderr stderro = glob.glob(os.path.join(persDir, "run_???", "stderr")) stderrname = os.path.join(outDir, "stderr") outf = open(stderrname, "w") for infile in stderro: inf = open(infile, "r") outf.write(inf.read()) inf.close() outf.close() fo.setOut('stderr', FileValue(stderrname)) # and do md.log logo = glob.glob(os.path.join(persDir, "run_???", "md.*log")) logname = os.path.join(outDir, "md.log") outf = open(logname, "w") for infile in logo: inf = open(infile, "r") outf.write(inf.read()) inf.close() outf.close() fo.setOut('log', FileValue(logname)) log.debug("Returning without command.") log.debug("fo.cmds=%s" % str(fo.cmds))
def extractData(confout, outDir, persDir, fo): """Concatenate all output data from the partial runs into the end results""" cmdnames = cmds.GromacsCommands() #outputs=dict() # Concatenate stuff confoutPath=os.path.join(outDir, "confout.gro") shutil.copy(confout[0], confoutPath ) #outputs['conf'] = Value(confoutPath, # inp.function.getOutput('conf').getType()) fo.setOut('conf', FileValue(confoutPath)) # fix the xtc files xtcso = sorted(glob.glob(os.path.join(persDir, "run_???", "traj.part*.xtc"))) # cull empty files and duplicate trajectory names xtcs=[] xtcbase=[] for file in xtcso: st=os.stat(file) base=os.path.split(file)[1] if st.st_size>0: if base not in xtcbase: xtcs.append(file) xtcbase.append(base) else: # there already was a file with this name. Overwrite # it because mdrun wasn't aware of it when writing. ind=xtcbase.index(base) xtcs[ind]=file # concatenate them xtcoutname=os.path.join(outDir, "traj.xtc") if len(xtcs) > 0: cmd = cmdnames.trjcat.split() + ["-f"] cmd.extend(xtcs) cmd.extend(["-o", xtcoutname]) stdo=open(os.path.join(persDir,"trjcat_xtc.out"),"w") sp=subprocess.Popen(cmd, stdout=stdo, stderr=subprocess.STDOUT) sp.communicate(None) stdo.close() fo.setOut('xtc', FileValue(xtcoutname)) # do the trrs trrso = sorted(glob.glob(os.path.join(persDir, "run_???", "traj.part*.trr"))) # cull empty files and duplicate trajectory names trrs=[] trrbase=[] for file in trrso: st=os.stat(file) base=os.path.split(file)[1] if st.st_size>0: if base not in trrbase: trrs.append(file) trrbase.append(base) else: # there already was a file with this name. Overwrite # it because mdrun wasn't aware of it when writing. ind=trrbase.index(base) trrs[ind]=file # concatenate them trroutname=os.path.join(outDir, "traj.trr") if len(trrs) > 0: cmd = cmdnames.trjcat + ["-f"] cmd.extend(trrs) cmd.extend(["-o", trroutname]) stdo=open(os.path.join(persDir,"trjcat_trr.out"),"w") sp=subprocess.Popen(cmd, stdout=stdo, stderr=subprocess.STDOUT) sp.communicate(None) stdo.close() fo.setOut('trr', FileValue(trroutname)) # and the edrs edrso = glob.glob(os.path.join(persDir, "run_???", "ener.part*.edr")) # cull empty files and duplicate trajectory names edrs=[] edrbase=[] for file in edrso: st=os.stat(file) base=os.path.split(file)[1] if st.st_size>0: if base not in edrbase: edrs.append(file) edrbase.append(base) else: # there already was a file with this name. Overwrite # it because mdrun wasn't aware of it when writing. ind=edrbase.index(base) edrs[ind]=file edroutname=os.path.join(outDir, "ener.edr") # concatenate them if len(edrs) > 0: cmd = cmdnames.eneconv.split() + ["-f"] cmd.extend(edrs) cmd.extend(["-o", edroutname]) stdo=open(os.path.join(persDir,"eneconv.out"),"w") sp=subprocess.Popen(cmd, stdout=stdo, stderr=subprocess.STDOUT) sp.communicate(None) stdo.close() fo.setOut('edr', FileValue(edroutname)) # do the stdout stdouto = glob.glob(os.path.join(persDir, "run_???", "stdout")) stdoutname=os.path.join(outDir, "stdout") outf=open(stdoutname,"w") for infile in stdouto: inf=open(infile, "r") outf.write(inf.read()) inf.close() outf.write("%s\n"%time.strftime("%a, %d %b %Y %H:%M:%S")) outf.write("%f\n"%time.time()) outf.close() #outputs['stdout'] = Value(stdoutname, # inp.function.getOutput('trr').getType()) fo.setOut('stdout', FileValue(stdoutname)) # do the stderr stderro = glob.glob(os.path.join(persDir, "run_???", "stderr")) stderrname=os.path.join(outDir, "stderr") outf=open(stderrname,"w") for infile in stderro: inf=open(infile, "r") outf.write(inf.read()) inf.close() outf.close() fo.setOut('stderr', FileValue(stderrname)) log.debug("Returning without command.") log.debug("fo.cmds=%s"%str(fo.cmds)) # do the COLVAR file colvaro = glob.glob(os.path.join(persDir, "run_???", "COLVAR")) colvarname=os.path.join(outDir, "COLVAR") outf=open(colvarname,'w') for cvfile in colvaro: inf=open(cvfile,'r') outf.write(inf.read()) inf.close() outf.close() fo.setOut('COLVAR',FileValue(colvarname)) # take the last HILLS file and the bias.dat file hillso = glob.glob(os.path.join(persDir, "run_???", "HILLS")) if len(hillso)>0: hillsname = os.path.join(outDir, "HILLS") outf = open(hillsname,'w') inf = open(hillso[-1],'r') outf.write(inf.read()) inf.close() log.debug("Set the HILLS outfile") fo.setOut('HILLS',FileValue(hillsname)) biaso = glob.glob(os.path.join(persDir, "run_???", "bias.dat")) if len(biaso)>0: biasname = os.path.join(outDir, "bias.dat") outf = open(biasname,'w') inf = open(biaso[-1],'r') outf.write(inf.read()) inf.close() fo.setOut('bias',FileValue(biasname))
def grompp(inp): cmdnames = cmds.GromacsCommands() if inp.testing(): # if there are no inputs, we're testing whether the command can run cpc.util.plugin.testCommand("%s -version" % cmdnames.grompp) return #log.debug("base dir=%s"%inp.getBaseDir()) #log.debug("output dir=%s"%inp.getOutputDir()) #log.debug("persistent dir=%s"%inp.getPersistentDir()) pers = cpc.dataflow.Persistence( os.path.join(inp.getPersistentDir(), "persistent.dat")) fo = inp.getFunctionOutput() if not (inp.getInputValue('conf').isUpdated() or inp.getInputValue('top').isUpdated() or inp.getInputValue('include').isUpdated() or inp.getInputValue('settings').isUpdated() or inp.getInputValue('ndx').isUpdated()): if pers.get('init') is not None: return fo if pers.get('init') is not None: log.debug("conf: %s" % (inp.getInputValue('conf').isUpdated())) log.debug("top: %s" % (inp.getInputValue('top').isUpdated())) log.debug("include: %s" % (inp.getInputValue('include').isUpdated())) log.debug("settings: %s" % (inp.getInputValue('settings').isUpdated())) log.debug("ndx: %s" % (inp.getInputValue('ndx').isUpdated())) pers.set('init', 1) mdpfile = procSettings(inp, inp.getOutputDir()) # copy the topology and include files topfile = os.path.join(inp.getOutputDir(), 'topol.top') shutil.copy(inp.getInput('top'), topfile) incl = inp.getInput('include') if incl is not None and len(incl) > 0: for i in range(len(incl)): filename = inp.getInput('include[%d]' % i) if filename is not None: # same name, but in one directory. nname = os.path.join(inp.getOutputDir(), os.path.split(filename)[1]) shutil.copy(filename, nname) # and execute grompp cmdlist = cmdnames.grompp.split() + [ "-f", mdpfile, "-quiet", "-c", inp.getInput('conf'), "-p", 'topol.top', # we made sure it's there "-o", "topol.tpr" ] if inp.hasInput('ndx'): cmdlist.append('-n') cmdlist.append(inp.getInput('ndx')) # TODO: symlink all the auxiliary files into the run dir stdoutfn = os.path.join(inp.getOutputDir(), "stdout") stdoutf = open(stdoutfn, "w") stdoutf.write("%s\n" % time.strftime("%a, %d %b %Y %H:%M:%S")) stdoutf.write("%f\n" % time.time()) #stdoutf=open(os.path.join(inp.getOutputDir(), "stderr"),"w") proc = subprocess.Popen(cmdlist, stdin=None, stdout=stdoutf, stderr=subprocess.STDOUT, cwd=inp.getOutputDir()) proc.communicate(None) stdoutf.close() if proc.returncode != 0: raise GromacsError("Error running grompp: %s" % (open(stdoutfn, 'r').read())) fo.setOut('stdout', FileValue(stdoutfn)) fo.setOut('tpr', FileValue(os.path.join(inp.getOutputDir(), "topol.tpr"))) pers.write() return fo