def parallelInit(self): ''' parallelInit: Initialization for the parallelization ''' import pytom_mpi if not pytom_mpi.isInitialised(): pytom_mpi.init() if pytom_mpi.size() < 2: raise RuntimeError( 'Number of available cluster nodes is less than 2.') self.numWorkers = pytom_mpi.size() - 1
def growingAverage(particleClassLists,score,angleObject,mask,destinationDirectory,preprocessing,verbose=False): import pytom_mpi if not pytom_mpi.isInitialised(): pytom_mpi.init() if pytom_mpi.size() > 1: mpi_myid = pytom_mpi.rank() if mpi_myid == 0: manager = GAManager(particleClassLists,score,angleObject,mask,destinationDirectory,preprocessing) manager.parallelGA(verbose) manager.parallelEnd() else: w = GAWorker(mpi_myid) w.run() else: print('Processing in sequential mode') manager = GAManager(particleClassLists,score,angleObject,mask,destinationDirectory,preprocessing) manager.sequentialGA(verbose) pytom_mpi.finalise()
def __init__(self): if not pytom_mpi.isInitialised(): pytom_mpi.init() self.mpi_id = pytom_mpi.rank() self.num_workers = pytom_mpi.size()-1 self.node_name = 'node_' + str(self.mpi_id) if self.num_workers < 1: raise RuntimeError("Not enough nodes to parallelize the job!")
def __init__(self): import pytom_mpi if not pytom_mpi.isInitialised(): pytom_mpi.init() self._mpi_id = pytom_mpi.rank() self._numberWorkers = pytom_mpi.size() - 1 self._jobList = []
def __init__(self, suffix=''): import pytom_mpi if not pytom_mpi.isInitialised(): pytom_mpi.init() self.suffix = suffix self.mpi_id = pytom_mpi.rank() self.name = 'node_' + str(self.mpi_id) self.size = pytom_mpi.size() self.clean()
def parallelEnd(self, verbose=True): """ parallelEnd: End the parallel running of the program. """ import pytom_mpi from pytom.parallel.messages import StatusMessage if verbose == True: print(self.name + ': sending end messages to all') for i in range(pytom_mpi.size()): msg = StatusMessage(str(self.mpi_id), str(i)) msg.setStatus("End") pytom_mpi.send(str(msg), i)
def end(self, verbose=False): if verbose == True: print(self.node_name + ': sending end messages to others') from pytom.parallel.messages import StatusMessage mpi_numberNodes = pytom_mpi.size() mpi_myid = pytom_mpi.rank() for i in range(1, mpi_numberNodes): msg = StatusMessage(str(mpi_myid),str(i)) msg.setStatus("End") pytom_mpi.send(str(msg),i) pytom_mpi.finalise()
def parallelEnd(self): """ parallelEnd : Sends status message = end to all workers. All workers will terminate upon receiving this message. @author: Thomas Hrabe """ import pytom_mpi from pytom.parallel.messages import StatusMessage if not pytom_mpi.isInitialised(): pytom_mpi.init() mpi_myid = pytom_mpi.rank() mpi_numberNodes = pytom_mpi.size() for i in range(1, mpi_numberNodes): msg = StatusMessage(mpi_myid.__str__(), i.__str__()) msg.setStatus("End") pytom_mpi.send(msg.__str__(), i)
def parallelEnd(self, verbose=True): """ parallelEnd : Sends status message = end to all workers. @param verbose: verbose mode @type verbose: boolean """ if verbose == True: print('Manager: sending end messages to workers') import pytom_mpi from pytom.parallel.messages import StatusMessage mpi_numberNodes = pytom_mpi.size() mpi_myid = pytom_mpi.rank() for i in range(1, mpi_numberNodes): msg = StatusMessage(str(mpi_myid), str(i)) msg.setStatus("End") pytom_mpi.send(str(msg), i)
def distributeAverage(particleList,averageName,showProgressBar = False,verbose=False,createInfoVolumes = False,sendEndMessage = False): """ distributeAverage : Distributes averaging to multiple nodes @param particleList: The particles @param averageName: Filename of new average @param verbose: Prints particle information. Disabled by default. @param createInfoVolumes: Create info data (wedge sum, inverted density) too? False by default. @return: A new Reference object @rtype: L{pytom.basic.structures.Reference} @author: Thomas Hrabe """ import pytom_mpi mpiInitialized = pytom_mpi.isInitialised() mpiAvailable = False if not mpiInitialized: try: pytom_mpi.init() if pytom_mpi.size() > 1: mpiAvailable = True except: print('Could not initialize MPI properly! Running in sequential mode!') if mpiAvailable: if pytom_mpi.rank() == 0: return _disrtibuteAverageMPI(particleList,averageName,showProgressBar,verbose,createInfoVolumes,sendEndMessage) else: from pytom.alignment.ExMaxAlignment import ExMaxWorker worker = ExMaxWorker() worker.parallelRun(False) else: print('MPI not available') return average(particleList,averageName,showProgressBar,verbose,createInfoVolumes)
def _disrtibuteAverageMPI(particleList,averageName,showProgressBar = False,verbose=False, createInfoVolumes = False,setParticleNodesRatio = 3,sendEndMessage = False): """ _distributeAverageMPI : Distributes averaging to multiple MPI nodes. @param particleList: The particles @param averageName: Filename of new average @param verbose: Prints particle information. Disabled by default. @param createInfoVolumes: Create info data (wedge sum, inverted density) too? False by default. @return: A new Reference object @rtype: L{pytom.basic.structures.Reference} @author: Thomas Hrabe """ import pytom_mpi from pytom.alignment.structures import ExpectationJob from pytom.parallel.parallelWorker import ParallelWorker from pytom.parallel.alignmentMessages import ExpectationJobMsg from pytom_volume import read,complexDiv,complexRealMult from pytom.basic.fourier import fft,ifft from pytom.basic.filter import lowpassFilter from pytom.basic.structures import Reference import os import sys numberOfNodes = pytom_mpi.size() particleNodesRatio = float(len(particleList)) / float(numberOfNodes) splitFactor = numberOfNodes if particleNodesRatio < setParticleNodesRatio: #make sure each node gets at least 20 particles. splitFactor = len(particleList) / setParticleNodesRatio splitLists = particleList.splitNSublists(splitFactor) msgList = [] avgNameList = [] preList = [] wedgeList = [] for i in range(len(splitLists)): plist = splitLists[i] avgName = averageName + '_dist' +str(i) + '.em' avgNameList.append(avgName) preList.append(averageName + '_dist' +str(i) + '-PreWedge.em') wedgeList.append(averageName + '_dist' +str(i) + '-WedgeSumUnscaled.em') job = ExpectationJob(plist,avgName) message = ExpectationJobMsg(0,i) message.setJob(job) msgList.append(message) #distribute averaging worker = ParallelWorker() worker.fillJobList(msgList) worker.parallelWork(True,sendEndMessage) #collect results result = read(preList[0]) wedgeSum = read(wedgeList[0]) for i in range(1,len(preList)): r = read(preList[i]) result += r w = read(wedgeList[i]) wedgeSum += w result.write(averageName[:len(averageName)-3]+'-PreWedge.em') wedgeSum.write(averageName[:len(averageName)-3] + '-WedgeSumUnscaled.em') invert_WedgeSum( invol=wedgeSum, r_max=result.sizeX()/2-2., lowlimit=.05*len(particleList), lowval=.05*len(particleList)) fResult = fft(result) r = complexRealMult(fResult,wedgeSum) result = ifft(r) result.shiftscale(0.0,1/float(result.sizeX()*result.sizeY()*result.sizeZ())) # do a low pass filter result = lowpassFilter(result, result.sizeX()/2-2, (result.sizeX()/2-1)/10.)[0] result.write(averageName) # clean results for i in range(0,len(preList)): os.system('rm ' + avgNameList[i]) os.system('rm ' + preList[i]) os.system('rm ' + wedgeList[i]) return Reference(averageName,particleList)
def parallelRun(self, job, splitX=0, splitY=0, splitZ=0, verbose=True, gpuID=-1): """ parallelRun: Parallel run the job on the computer cluster. @param job: job @type job: L{pytom.localization.peak_job.PeakJob} @param splitX: split part along the x dimension @type splitX: integer @param splitY: split part along the y dimension @type splitY: integer @param splitZ: split part along the z dimension @type splitZ: integer """ import pytom_mpi if self.mpi_id == 0: # send the first message # if not pytom_mpi.isInitialised(): # pytom_mpi.init() job.members = pytom_mpi.size() print('job members', job.members) job.send(0, 0) print("\n") self.gpuID = gpuID end = False while not end: # get the message string mpi_msgString = getMsgStr() msgType = self.getMsgType(mpi_msgString) if msgType == 2: # Job msg msg = self.getJobMsg(mpi_msgString) job = self.jobFromMsg(msg) # set members if self.mpi_id == 0: self.distributeJobs(job, splitX, splitY, splitZ) else: self.distributeJobs(job) result = self.run(verbose, gpuID=gpuID) self.summarize(result, self.jobID) elif msgType == 1: # Result msg msg = self.getResMsg(mpi_msgString) res = self.resFromMsg(msg) if verbose == True: print(self.name + ": processing result from worker " + msg.getSender()) resV = res.result.getVolume() resO = res.orient.getVolume() jobID = res.jobID self.summarize([resV, resO], jobID) elif msgType == 0: # Status msg # get the message as StatusMessage and finish from pytom.parallel.messages import StatusMessage msg = StatusMessage('', '') msg.fromStr(mpi_msgString) if msg.getStatus() == 'End': end = True if verbose == True: print(self.name + ': end') else: # Error raise RuntimeError("False message type!") if self.mpi_id == 0: # delete the temporary files on the disk import os files = os.listdir(self.dstDir) for name in files: if 'job' in name and '.em' in name and not 'sum' in name and not 'sqr' in name: os.remove(self.dstDir + '/' + name) if self.gpuID: gpuflag = '' else: gpuflag = '' # rename the result files name os.rename( self.dstDir + '/' + 'node_0_res.em', self.dstDir + '/' + 'scores_{}{}.em'.format(self.suffix, gpuflag)) os.rename( self.dstDir + '/' + 'node_0_orient.em', self.dstDir + '/' + 'angles_{}{}.em'.format(self.suffix, gpuflag)) self.clean() # clean itself pytom_mpi.finalise()
def parallelReconstruction(particleList, projectionList, cubeSize, binning, applyWeighting,verbose=False): """ parallelReconstruction """ import pytom_mpi from pytom.parallel.messages import StatusMessage if not pytom_mpi.isInitialised(): pytom_mpi.init() mpi_id = pytom_mpi.rank() if mpi_id == 0: firstDistribute = False numberWorkers = pytom_mpi.size() -1 #split particleList by number nodes splitSize = len(particleList) / numberWorkers pl = [] for i in range(0,len(particleList),splitSize): pl.append(particleList[i:i+splitSize]) for i in range(0,numberWorkers): msg = ReconstructionMessage(0,i+1,pl[i],projectionList,cubeSize, binning,applyWeighting) pytom_mpi.send(str(msg),i+1) finished = False msgCounter = 0 while not finished: mpi_msgString = pytom_mpi.receive() msg = StatusMessage(1,'0') msg.fromStr(mpi_msgString) if not msg.getStatus() == 'Finished': print('Worker ' + str(msg.getSender()) + ' sent status: ' + str(msg.getStatus())) msgCounter += 1 finished = msgCounter == numberWorkers for i in range(0,numberWorkers): msg = StatusMessage(mpi_id,'0') msg.setStatus('End') pytom_mpi.send(str(msg),i+1) else: worker = ReconstructionWorker() worker.parallelRun(verbose) pytom_mpi.finalise()
def distributeExpectation(particleLists, iterationDirectory, averagePrefix, verbose=False, symmetry=None): """ distributeExpectation: Distributes particle expectation (averaging) to multiple workers. Required by many algorithms such as MCOEXMX @param particleLists: list of particleLists @param iterationDirectory: @param averagePrefix: @param verbose: @param symmetry: """ import pytom_mpi from pytom.tools.files import checkDirExists from pytom.parallel.alignmentMessages import ExpectationJobMsg, ExpectationResultMsg from pytom.alignment.structures import ExpectationJob from pytom.basic.structures import Reference, ReferenceList from os import mkdir if not pytom_mpi.isInitialised(): pytom_mpi.init() mpi_myid = pytom_mpi.rank() if not mpi_myid == 0: raise RuntimeError( 'This function (distributeExpectation) can only be processed by mpi_id = 0! ID == ' + str(mpi_myid) + ' Aborting!') if not checkDirExists(iterationDirectory): raise IOError('The iteration directory does not exist. ' + iterationDirectory) mpi_numberNodes = pytom_mpi.size() if mpi_numberNodes <= 1: raise RuntimeError( 'You must run clustering with openMPI on multiple CPUs') listIterator = 0 referenceList = ReferenceList() #distribute jobs to all nodes for i in range(1, mpi_numberNodes): if verbose: print('Starting first job distribute step') if listIterator < len(particleLists): if not checkDirExists(iterationDirectory + 'class' + str(listIterator) + '/'): mkdir(iterationDirectory + 'class' + str(listIterator) + '/') averageName = iterationDirectory + 'class' + str( listIterator) + '/' + averagePrefix + '-' + str( listIterator) + '.em' if not symmetry.isOneFold(): newPl = symmetry.apply(particleLists[listIterator]) job = ExpectationJob(newPl, averageName) else: job = ExpectationJob(particleLists[listIterator], averageName) newReference = Reference(averageName, particleLists[listIterator]) referenceList.append(newReference) jobMsg = ExpectationJobMsg(0, str(i)) jobMsg.setJob(job) pytom_mpi.send(str(jobMsg), i) if verbose: print(jobMsg) listIterator = listIterator + 1 finished = False #there are more jobs than nodes. continue distributing and collect results receivedMsgCounter = 0 while not finished: #listen and collect mpi_msgString = pytom_mpi.receive() if verbose: print(mpi_msgString) jobResultMsg = ExpectationResultMsg('', '') jobResultMsg.fromStr(mpi_msgString) receivedMsgCounter = receivedMsgCounter + 1 #send new job to free node if listIterator < len(particleLists): if not checkDirExists(iterationDirectory + 'class' + str(listIterator) + '/'): mkdir(iterationDirectory + 'class' + str(listIterator) + '/') averageName = iterationDirectory + 'class' + str( listIterator) + '/' + averagePrefix + '-' + str( listIterator) + '.em' job = ExpectationJob(particleLists[listIterator], averageName) newReference = Reference(averageName, particleLists[listIterator]) referenceList.append(newReference) jobMsg = ExpectationJobMsg(0, str(jobResultMsg.getSender())) jobMsg.setJob(job) pytom_mpi.send(str(jobMsg), i) if verbose: print(jobMsg) listIterator = listIterator + 1 finished = listIterator >= len( particleLists) and receivedMsgCounter == len(particleLists) return referenceList
def distributedCorrelationMatrix(job, verbose=False): """ distributedCorrelationMatrix: Performs calculation of correlation matrix either on multiple processes or sequentially. """ import pytom_mpi pytom_mpi.init() if pytom_mpi.size() > 1: mpi_myid = pytom_mpi.rank() if mpi_myid == 0: manager = CMManager(job) manager.distributeCalculation(mpi_myid, verbose) manager.parallelEnd() manager.saveMatrix() else: from pytom.parallel.clusterMessages import CorrelationVectorMessage, CorrelationVectorJobMessage from pytom.parallel.messages import StatusMessage, MessageError end = False while not end: mpi_msg = pytom_mpi.receive() if verbose: print(mpi_msg) try: msg = CorrelationVectorJobMessage() msg.fromStr(mpi_msg) worker = CMWorker(msg.getJob()) #worker.dumpMsg2Log('node'+str(mpi_myid)+'.log', msg.__str__()) resultVector = worker.run() resultMessage = CorrelationVectorMessage(mpi_myid, 0) resultMessage.setVector(resultVector) #worker.dumpMsg2Log('node'+mpi_myid.__str__()+'.log', resultMessage.__str__()) if verbose and False: print(resultMessage) pytom_mpi.send(resultMessage.__str__(), 0) except (MessageError, RuntimeError, IndexError): msg = StatusMessage('', '') msg.fromStr(mpi_msg) if msg.getStatus() == 'End': end = True print('Node ' + mpi_myid.__str__() + ' finished') else: print('Sequential Processing! Running on one machine only!') manager = CMManager(job) manager.calculateMatrix() manager.saveMatrix() pytom_mpi.finalise()
def distributeCalculation(self, mpi_myid, verbose=False): """ distributeCalculation: Distribute calculation of matrix to multiple nodes. """ import pytom_mpi from pytom.cluster.correlationMatrixStructures import CorrelationVectorJob from pytom.parallel.clusterMessages import CorrelationVectorJobMessage, CorrelationVectorMessage from pytom.tools.ProgressBar import FixedProgBar if not mpi_myid == 0: raise Exception( 'This function (distributeCalculation) can only be processed by mpi_id = 0! ID == ' + mpi_myid.__str__() + ' Aborting!') mpi_myname = 'node_' + mpi_myid.__str__() mpi_numberNodes = pytom_mpi.size() particleIndex = 0 progressBar = FixedProgBar(0, len(self._particleList), 'Particles correlated ') progressBar.update(0) #distribute on all nodes for nodeIndex in range(1, mpi_numberNodes): if particleIndex < len(self._particleList): particle = self._particleList[particleIndex] reducedParticleList = self._particleList[particleIndex + 1:] job = CorrelationVectorJob(particle, reducedParticleList, self._mask, particleIndex, self._applyWedge, self._binningFactor, self._lowestFrequency, self._highestFrequency) jobMsg = CorrelationVectorJobMessage(str(mpi_myid), str(nodeIndex)) jobMsg.setJob(job) if verbose: print(jobMsg) pytom_mpi.send(str(jobMsg), nodeIndex) particleIndex = particleIndex + 1 numberVectorsReceived = 0 finished = numberVectorsReceived > len(self._particleList) while not finished: #listen until numberVectorsReceived > len(self._particleList) and continue distributing mpi_msgString = pytom_mpi.receive() if verbose: print(mpi_msgString) correlationVectorMsg = CorrelationVectorMessage() correlationVectorMsg.fromStr(mpi_msgString) assert correlationVectorMsg.__str__() == mpi_msgString vector = correlationVectorMsg.getVector() self._setMatrixValuesFromVector(vector.getParticleIndex(), vector) self._savePreliminaryResult() #print 'Result received from ' + correlationVectorMsg.getSender().__str__() + ' and matrix saved to disk.' numberVectorsReceived = numberVectorsReceived + 1 if particleIndex < len(self._particleList): #print 'Send particle number :' , particleIndex particle = self._particleList[particleIndex] reducedParticleList = self._particleList[particleIndex + 1:] job = CorrelationVectorJob(particle, reducedParticleList, self._mask, particleIndex, self._applyWedge, self._binningFactor, self._lowestFrequency, self._highestFrequency) jobMsg = CorrelationVectorJobMessage( mpi_myid.__str__(), correlationVectorMsg.getSender().__str__()) jobMsg.setJob(job) pytom_mpi.send(jobMsg.__str__(), int(correlationVectorMsg.getSender())) particleIndex = particleIndex + 1 #update progress bar progressBar.update(numberVectorsReceived) finished = numberVectorsReceived >= len(self._particleList)