def _init(self): from natsort import natsorted folders = natsorted(glob(path.join(self.generatorspath, '*', ''))) # I need the extra '' to add a finishing / if len(folders) == 0: logger.info('Generators folder has no subdirectories, using folder itself') folders.append(self.generatorspath) numF = len(folders) numCopies = np.ones(numF, dtype=int) * int(np.floor(self.nmax / numF)) numExtra = np.mod(self.nmax, numF) extraChoices = np.random.choice(numF, numExtra, replace=False) # draw the extra numCopies[extraChoices] += 1 # numCopies = numCopies + np.random.multinomial(numExtra, [1/numF]*numF) # draw the extra equally from a flat distribution if not path.exists(self.inputpath): makedirs(self.inputpath) # Check if epoch 1 directories already exist in the input folder existing = glob(path.join(self.inputpath, 'e1s*')) if len(existing) != 0: raise NameError('Epoch 1 directories already exist.') k = 1 for i in range(numF): for j in range(numCopies[i]): name = _simName(folders[i]) inputdir = path.join(self.inputpath, 'e1s' + str(k) + '_' + name) #src = path.join(self.generatorspath, name, '*') src = folders[i] copytree(src, inputdir, symlinks=True, ignore=ignore_patterns(*_IGNORE_EXTENSIONS)) k += 1
def _writeInputsFunction(i, f, epoch, inputpath, coorname): regex = re.compile('(e\d+s\d+)_') frameNum = f.frame piece = f.piece if f.sim.parent is None: currSim = f.sim else: currSim = f.sim.parent traj = currSim.trajectory[piece] if currSim.input is None: raise NameError('Could not find input folder in simulation lists. Cannot create new simulations.') wuName = _simName(traj) res = regex.search(wuName) if res: # If we are running on top of adaptive, use the first name part for the next sim name wuName = res.group(1) # create new job directory newName = 'e' + str(epoch) + 's' + str(i + 1) + '_' + wuName + 'p' + str(piece) + 'f' + str(frameNum) newDir = path.join(inputpath, newName, '') # copy previous input directory including input files copytree(currSim.input, newDir, symlinks=False, ignore=ignore_patterns('*.coor', '*.rst', '*.out', *_IGNORE_EXTENSIONS)) # overwrite input file with new one. frameNum + 1 as catdcd does 1 based indexing mol = Molecule(currSim.molfile) # Always read the mol file, otherwise it does not work if we need to save a PDB as coorname mol.read(traj) mol.dropFrames(keep=frameNum) # Making sure only specific frame to write is kept mol.write(path.join(newDir, coorname))
def _writeInputsFunction(i, f, epoch, inputpath): regex = re.compile('(e\d+s\d+)_') frameNum = f.frame piece = f.piece if f.sim.parent is None: currSim = f.sim else: currSim = f.sim.parent traj = currSim.trajectory[piece] if currSim.input is None: raise NameError( 'Could not find input folder in simulation lists. Cannot create new simulations.' ) wuName = _simName(traj) res = regex.search(wuName) if res: # If we are running on top of adaptive, use the first name part for the next sim name wuName = res.group(1) # create new job directory newName = 'e' + str(epoch) + 's' + str(i + 1) + '_' + wuName + 'p' + str( piece) + 'f' + str(frameNum) newDir = path.join(inputpath, newName, '') # copy previous input directory including input files copytree(currSim.input, newDir, symlinks=False, ignore=ignore_patterns(_TRAJ_EXTENSIONS_COOR)) # overwrite input file with new one. frameNum + 1 as catdcd does 1 based indexing mol = Molecule() mol.read(traj) mol.frame = frameNum mol.write(path.join(newDir, 'input.coor'))
def _writeInputs(self, simsframes, epoch=None): if epoch is None: epoch = self._getEpoch() + 1 test = glob(path.join(self.inputpath, 'e' + str(epoch) + '*')) if len(test) != 0: raise NameError('Input dirs of epoch ' + str(epoch) + ' already exists.') if path.exists( path.join(self.inputpath, 'e' + str(epoch) + '_writeinputs.log')): raise NameError('Epoch logfile already exists. Cant overwrite it.') fid = open( path.join(self.inputpath, 'e' + str(epoch) + '_writeinputs.log'), 'w') regex = re.compile('(e\d+s\d+)_') for i, f in enumerate(simsframes): frameNum = f.frame piece = f.piece #print(frameNum) if f.sim.parent is None: currSim = f.sim else: currSim = f.sim.parent traj = currSim.trajectory[piece] if currSim.input is None: raise NameError( 'Could not find input folder in simulation lists. Cannot create new simulations.' ) wuName = _simName(traj) res = regex.search(wuName) if res: # If we are running on top of adaptive, use the first name part for the next sim name wuName = res.group(1) # create new job directory newName = 'e' + str(epoch) + 's' + str( i + 1) + '_' + wuName + 'p' + str(piece) + 'f' + str(frameNum) newDir = path.join(self.inputpath, newName, '') # copy previous input directory including input files copytree(currSim.input, newDir, symlinks=False, ignore=ignore_patterns('*.dcd', '*.xtc', '*.coor')) # overwrite input file with new one. frameNum + 1 as catdcd does 1 based indexing mol = Molecule() mol.read(traj) mol.frame = frameNum mol.write(path.join(newDir, 'input.coor')) # write nextInput file fid.write('# {0} \n{1} {2}\n'.format(newName, traj, frameNum)) fid.close()
def _writeInputs(self, simsframes, epoch=None): if epoch is None: epoch = self._getEpoch() + 1 test = glob(path.join(self.inputpath, 'e' + str(epoch) + '*')) if len(test) != 0: raise NameError('Input dirs of epoch ' + str(epoch) + ' already exists.') if path.exists(path.join(self.inputpath, 'e' + str(epoch) + '_writeinputs.log')): raise NameError('Epoch logfile already exists. Cant overwrite it.') fid = open(path.join(self.inputpath, 'e' + str(epoch) + '_writeinputs.log'), 'w') regex = re.compile('(e\d+s\d+)_') for i, f in enumerate(simsframes): frameNum = f.frame piece = f.piece #print(frameNum) if f.sim.parent is None: currSim = f.sim else: currSim = f.sim.parent traj = currSim.trajectory[piece] if currSim.input is None: raise NameError('Could not find input folder in simulation lists. Cannot create new simulations.') wuName = _simName(traj) res = regex.search(wuName) if res: # If we are running on top of adaptive, use the first name part for the next sim name wuName = res.group(1) # create new job directory newName = 'e' + str(epoch) + 's' + str(i+1) + '_' + wuName + 'p' + str(piece) + 'f' + str(frameNum) newDir = path.join(self.inputpath, newName, '') # copy previous input directory including input files copytree(currSim.input, newDir, symlinks=False, ignore=ignore_patterns('*.dcd', '*.xtc', '*.coor')) # overwrite input file with new one. frameNum + 1 as catdcd does 1 based indexing mol = Molecule() mol.read(traj) mol.frame = frameNum mol.write(path.join(newDir, 'input.coor')) # write nextInput file fid.write('# {0} \n{1} {2}\n'.format(newName, traj, frameNum)) fid.close()
def reconstructAdaptiveTraj(simlist, trajID): """ Reconstructs a long trajectory out of short adaptive runs. Parameters ---------- simlist : numpy.ndarray of :class:`Sim <htmd.simlist.Sim>` objects A simulation list generated by the :func:`simlist <htmd.simlist.simlist>` function trajID : int The id of the trajectory from which to start going back. Returns ------- mol : :class:`Molecule <moleculekit.molecule.Molecule>` object A Molecule object containing the reconstructed trajectory chain : np.ndarray The simulation IDs of all simulations involved pathlist : np.ndarray of str The names of all simulations involved. Examples -------- >>> mol, chain, pathlist = reconstructAdaptiveTraj(data.simlist, 52) """ sim = None for s in simlist: if s.simid == trajID: sim = s break if sim is None: raise NameError( 'Could not find sim with ID {} in the simlist.'.format(trajID)) pathlist = [] pathlist.append(sim.trajectory[0]) chain = [] chain.append((sim, -1, -1)) epo = None while epo != 1: [sim, piece, frame, epo] = _findprevioustraj(simlist, _simName(sim.trajectory[0])) pathlist.append(sim.trajectory[piece]) chain.append((sim, piece, frame)) pathlist = pathlist[::-1] chain = chain[::-1] mol = Molecule(sim.molfile) mol.coords = np.zeros((mol.numAtoms, 3, 0), dtype=np.float32) mol.fileloc = [] mol.box = np.zeros((3, 0)) for i, c in enumerate(chain): tmpmol = Molecule(sim.molfile) tmpmol.read(c[0].trajectory) endpiece = c[1] fileloc = np.vstack(tmpmol.fileloc) filenames = fileloc[:, 0] pieces = np.unique(filenames) firstpieceframe = np.where(filenames == pieces[endpiece])[0][0] endFrame = firstpieceframe + c[2] if endFrame != -1: tmpmol.coords = tmpmol.coords[:, :, 0:endFrame + 1] # Adding the actual respawned frame (+1) since the respawned sim doesn't include it in the xtc tmpmol.fileloc = tmpmol.fileloc[0:endFrame + 1] tmpmol.box = tmpmol.box[:, 0:endFrame + 1] mol.coords = np.concatenate((mol.coords, tmpmol.coords), axis=2) mol.box = np.concatenate((mol.box, tmpmol.box), axis=1) mol.fileloc += tmpmol.fileloc #mol.fileloc[:, 1] = range(np.size(mol.fileloc, 0)) return mol, chain, pathlist
def reconstructAdaptiveTraj(simlist, trajID): """ Reconstructs a long trajectory out of short adaptive runs. Parameters ---------- simlist : numpy.ndarray of :class:`Sim <htmd.simlist.Sim>` objects A simulation list generated by the :func:`simlist <htmd.simlist.simlist>` function trajID : int The id of the trajectory from which to start going back. Returns ------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object A Molecule object containing the reconstructed trajectory chain : np.ndarray The simulation IDs of all simulations involved pathlist : np.ndarray of str The names of all simulations involved. Examples -------- >>> mol, chain, pathlist = reconstructAdaptiveTraj(data.simlist, 52) """ sim = None for s in simlist: if s.simid == trajID: sim = s break if sim is None: raise NameError('Could not find sim with ID {} in the simlist.'.format(trajID)) pathlist = [] pathlist.append(sim.trajectory[0]) chain = [] chain.append((sim, -1, -1)) epo = None while epo != 1: [sim, piece, frame, epo] = _findprevioustraj(simlist, _simName(sim.trajectory[0])) pathlist.append(sim.trajectory[piece]) chain.append((sim, piece, frame)) pathlist = pathlist[::-1] chain = chain[::-1] mol = Molecule(sim.molfile) mol.coords = np.zeros((mol.numAtoms, 3, 0), dtype=np.float32) mol.fileloc = [] mol.box = np.zeros((3, 0)) for i, c in enumerate(chain): tmpmol = Molecule(sim.molfile) tmpmol.read(c[0].trajectory) endpiece = c[1] fileloc = np.vstack(tmpmol.fileloc) filenames = fileloc[:, 0] pieces = np.unique(filenames) firstpieceframe = np.where(filenames == pieces[endpiece])[0][0] endFrame = firstpieceframe + c[2] if endFrame != -1: tmpmol.coords = tmpmol.coords[:, :, 0:endFrame + 1] # Adding the actual respawned frame (+1) since the respawned sim doesn't include it in the xtc tmpmol.fileloc = tmpmol.fileloc[0:endFrame + 1] tmpmol.box = tmpmol.box[:, 0:endFrame + 1] mol.coords = np.concatenate((mol.coords, tmpmol.coords), axis=2) mol.box = np.concatenate((mol.box, tmpmol.box), axis=1) mol.fileloc += tmpmol.fileloc #mol.fileloc[:, 1] = range(np.size(mol.fileloc, 0)) return mol, chain, pathlist