def __init__(self, num_procs, controller_params, description): """ Initialization routine for PFASST controller Args: num_procs: number of parallel time steps (still serial, though), can be 1 controller_params: parameter set for the controller and the steps description: all the parameters to set up the rest (levels, problems, transfer, ...) """ # call parent's initialization routine super(allinclusive_multigrid_nonMPI, self).__init__(controller_params) self.MS = [stepclass.step(description)] # try to initialize via dill.copy (much faster for many time-steps) try: for p in range(num_procs - 1): self.MS.append(dill.copy(self.MS[0])) # if this fails (e.g. due to un-picklable data in the steps), initialize seperately except dill.PicklingError and TypeError: self.logger.warning( 'Need to initialize steps separately due to pickling error') for p in range(num_procs - 1): self.MS.append(stepclass.step(description)) if self.params.dump_setup: self.dump_setup(step=self.MS[0], controller_params=controller_params, description=description) if num_procs > 1 and len(self.MS[0].levels) > 1: for S in self.MS: for L in S.levels: if not L.sweep.coll.right_is_node: raise ControllerError( "For PFASST to work, we assume uend^k = u_M^k") if all(len(S.levels) == len(self.MS[0].levels) for S in self.MS): self.nlevels = len(self.MS[0].levels) else: raise ControllerError( 'all steps need to have the same number of levels') if self.nlevels == 0: raise ControllerError('need at least one level') self.nsweeps = [] for nl in range(self.nlevels): if all(S.levels[nl].params.nsweeps == self.MS[0].levels[nl].params.nsweeps for S in self.MS): self.nsweeps.append(self.MS[0].levels[nl].params.nsweeps) if self.nlevels > 1 and self.nsweeps[-1] > 1: raise ControllerError( 'this controller cannot do multiple sweeps on coarsest level')
def default(local_MS_running): """ Default routine to catch wrong status Args: local_MS_running (list): list of currently running steps """ raise ControllerError('Unknown stage, got %s' % local_MS_running[0].status.stage) # TODO
def __init__(self, num_procs, controller_params, description): """ Initialization routine for PFASST controller Args: num_procs: number of parallel time steps (still serial, though), can be 1 controller_params: parameter set for the controller and the step class description: all the parameters to set up the rest (levels, problems, transfer, ...) """ # call parent's initialization routine super(allinclusive_classic_nonMPI, self).__init__(controller_params) self.logger.warning('classic controller is about to become deprecated, use multigrid controller instead') self.MS = [stepclass.step(description)] # try to initialize via dill.copy (much faster for many time-steps) try: for p in range(num_procs - 1): self.MS.append(dill.copy(self.MS[0])) # if this fails (e.g. due to un-picklable data in the steps), initialize seperately except dill.PicklingError and TypeError: self.logger.warning('Need to initialize steps separately due to pickling error') for p in range(num_procs - 1): self.MS.append(stepclass.step(description)) assert not (len(self.MS) > 1 and len(self.MS[0].levels) == 1), "ERROR: classic cannot do MSSDC" if self.params.dump_setup: self.dump_setup(step=self.MS[0], controller_params=controller_params, description=description) num_levels = len(self.MS[0].levels) if num_procs > 1 and num_levels > 1: for S in self.MS: for L in S.levels: if not L.sweep.coll.right_is_node or L.sweep.params.do_coll_update: raise ControllerError("For PFASST to work, we assume uend^k = u_M^k in this controller") for nl in range(len(self.MS[0].levels)): if self.MS[0].levels[nl].params.nsweeps > 1: raise ControllerError('classic controller cannot do multiple sweeps')
def __init__(self, controller_params, description, comm): """ Initialization routine for PFASST controller Args: controller_params: parameter set for the controller and the step class description: all the parameters to set up the rest (levels, problems, transfer, ...) comm: MPI communicator """ # call parent's initialization routine super(allinclusive_multigrid_MPI, self).__init__(controller_params) # create single step per processor self.S = step(description) # pass communicator for future use self.comm = comm # add request handle container for isend self.req_send = [] # add request handler for status send self.req_status = None num_procs = self.comm.Get_size() rank = self.comm.Get_rank() if self.params.dump_setup and rank == 0: self.dump_setup(step=self.S, controller_params=controller_params, description=description) num_levels = len(self.S.levels) if num_procs > 1 and num_levels == 1: raise ControllerError("multigrid cannot do MSSDC, sorry!") if num_procs > 1 and num_levels > 1: for L in self.S.levels: if not L.sweep.coll.right_is_node or L.sweep.params.do_coll_update: raise ControllerError( "For PFASST to work, we assume uend^k = u_M^k")
def __init__(self, controller_params, description, comm): """ Initialization routine for PFASST controller Args: controller_params: parameter set for the controller and the step class description: all the parameters to set up the rest (levels, problems, transfer, ...) comm: MPI communicator """ # call parent's initialization routine super(controller_MPI, self).__init__(controller_params) # create single step per processor self.S = step(description) # pass communicator for future use self.comm = comm num_procs = self.comm.Get_size() rank = self.comm.Get_rank() # insert data on time communicator to the steps (helpful here and there) self.S.status.time_size = num_procs if self.params.dump_setup and rank == 0: self.dump_setup(step=self.S, controller_params=controller_params, description=description) num_levels = len(self.S.levels) # add request handler for status send self.req_status = None # add request handle container for isend self.req_send = [None] * num_levels self.req_ibcast = None self.req_diff = None if num_procs > 1 and num_levels > 1: for L in self.S.levels: if not L.sweep.coll.right_is_node or L.sweep.params.do_coll_update: raise ControllerError( "For PFASST to work, we assume uend^k = u_M^k") if num_levels == 1 and self.params.predict_type is not None: self.logger.warning( 'you have specified a predictor type but only a single level.. ' 'predictor will be ignored')
def __init__(self, num_procs, controller_params, description): """ Initialization routine for PFASST controller Args: num_procs: number of parallel time steps (still serial, though), can be 1 controller_params: parameter set for the controller and the step class description: all the parameters to set up the rest (levels, problems, transfer, ...) """ # call parent's initialization routine super(allinclusive_classic_nonMPI, self).__init__(controller_params) self.logger.warning('classic controller is about to become deprecated, use multigrid controller instead') self.MS = [] # simply append step after step and generate the hierarchies for p in range(num_procs): self.MS.append(stepclass.step(description)) assert not (len(self.MS) > 1 and len(self.MS[0].levels) == 1), "ERROR: classic cannot do MSSDC" if self.params.dump_setup: self.dump_setup(step=self.MS[0], controller_params=controller_params, description=description) num_levels = len(self.MS[0].levels) if num_procs > 1 and num_levels > 1: for S in self.MS: for L in S.levels: if not L.sweep.coll.right_is_node or L.sweep.params.do_coll_update: raise ControllerError("For PFASST to work, we assume uend^k = u_M^k in this controller") for nl in range(len(self.MS[0].levels)): if self.MS[0].levels[nl].params.nsweeps > 1: raise ControllerError('classic controller cannot do multiple sweeps')
def pfasst(self, comm, num_procs): """ Main function including the stages of SDC, MLSDC and PFASST (the "controller") For the workflow of this controller, check out one of our PFASST talks Args: comm: communicator num_procs: number of active processors """ stage = self.S.status.stage self.logger.debug(stage) if stage == 'SPREAD': # (potentially) serial spreading phase # first stage: spread values self.hooks.pre_step(step=self.S, level_number=0) # call predictor from sweeper self.S.levels[0].sweep.predict() # update stage if len( self.S.levels ) > 1 and self.params.predict: # MLSDC or PFASST with predict self.S.status.stage = 'PREDICT' elif len(self.S.levels) > 1: # MLSDC or PFASST without predict self.hooks.pre_iteration(step=self.S, level_number=0) self.S.status.stage = 'IT_FINE' elif num_procs > 1 and len(self.S.levels) == 1: # MSSDC self.hooks.pre_iteration(step=self.S, level_number=0) self.S.status.stage = 'IT_COARSE' elif num_procs == 1: # SDC self.hooks.pre_iteration(step=self.S, level_number=0) self.S.status.stage = 'IT_FINE' else: raise ControllerError( "Don't know what to do after spread, aborting") elif stage == 'PREDICT': # call predictor (serial) self.predictor(comm) # update stage self.hooks.pre_iteration(step=self.S, level_number=0) self.S.status.stage = 'IT_FINE' elif stage == 'IT_FINE': # do fine sweep # standard sweep workflow: update nodes, compute residual, log progress self.hooks.pre_sweep(step=self.S, level_number=0) for k in range(self.S.levels[0].params.nsweeps): self.S.levels[0].sweep.update_nodes() self.S.levels[0].sweep.compute_residual() self.hooks.post_sweep(step=self.S, level_number=0) # update stage self.S.status.stage = 'IT_CHECK' elif stage == 'IT_CHECK': # check whether to stop iterating (parallel) self.hooks.post_iteration(step=self.S, level_number=0) self.S.status.done = self.check_convergence(self.S) all_done = comm.allgather(self.S.status.done) # if not everyone is ready yet, keep doing stuff if not all(all_done): self.S.status.done = False # increment iteration count here (and only here) self.S.status.iter += 1 self.hooks.pre_iteration(step=self.S, level_number=0) # multi-level or single-level? if len(self.S.levels) > 1: # MLSDC or PFASST self.S.status.stage = 'IT_UP' elif num_procs > 1: # MSSDC self.S.status.stage = 'IT_COARSE_RECV' elif num_procs == 1: # SDC self.S.status.stage = 'IT_FINE' else: raise ControllerError("Weird stage in IT_CHECK") else: self.S.levels[0].sweep.compute_end_point() self.hooks.post_step(step=self.S, level_number=0) self.S.status.stage = 'DONE' elif stage == 'IT_UP': # go up the hierarchy from finest to coarsest level (parallel) self.S.transfer(source=self.S.levels[0], target=self.S.levels[1]) # sweep and send on middle levels (not on finest, not on coarsest, though) for l in range(1, len(self.S.levels) - 1): self.hooks.pre_sweep(step=self.S, level_number=l) for k in range(self.S.levels[l].params.nsweeps): self.S.levels[l].sweep.update_nodes() self.S.levels[l].sweep.compute_residual() self.hooks.post_sweep(step=self.S, level_number=l) # transfer further up the hierarchy self.S.transfer(source=self.S.levels[l], target=self.S.levels[l + 1]) # update stage self.S.status.stage = 'IT_COARSE_RECV' elif stage == 'IT_COARSE_RECV': # receive from previous step (if not first) if not self.S.status.first: self.logger.debug( 'recv data: process %s, stage %s, time %s, source %s, tag %s, iter %s' % (self.S.status.slot, self.S.status.stage, self.S.time, self.S.prev, len(self.S.levels) - 1, self.S.status.iter)) self.recv(target=self.S.levels[-1], source=self.S.prev, tag=self.S.status.iter, comm=comm) # update stage self.S.status.stage = 'IT_COARSE' elif stage == 'IT_COARSE': # sweeps on coarsest level (serial/blocking) # do the sweep self.hooks.pre_sweep(step=self.S, level_number=len(self.S.levels) - 1) for k in range(self.S.levels[-1].params.nsweeps): self.S.levels[-1].sweep.update_nodes() self.S.levels[-1].sweep.compute_residual() self.hooks.post_sweep(step=self.S, level_number=len(self.S.levels) - 1) self.S.levels[-1].sweep.compute_end_point() # send to next step if not self.S.status.last: self.logger.debug( 'isend data: process %s, stage %s, time %s, target %s, tag %s, iter %s' % (self.S.status.slot, self.S.status.stage, self.S.time, self.S.next, len(self.S.levels) - 1, self.S.status.iter)) self.send(source=self.S.levels[-1], target=self.S.next, tag=self.S.status.iter, comm=comm) # update stage if len(self.S.levels) > 1: # MLSDC or PFASST self.S.status.stage = 'IT_DOWN' else: # MSSDC self.S.status.stage = 'IT_CHECK' elif stage == 'IT_DOWN': # prolong corrections down to finest level (parallel) # receive and sweep on middle levels (except for coarsest level) for l in range(len(self.S.levels) - 1, 0, -1): # prolong values self.S.transfer(source=self.S.levels[l], target=self.S.levels[l - 1]) self.S.levels[l - 1].sweep.compute_end_point() req_send = None if not self.S.status.last and self.params.fine_comm: self.logger.debug( 'send data: process %s, stage %s, time %s, target %s, tag %s, iter %s' % (self.S.status.slot, self.S.status.stage, self.S.time, self.S.next, l - 1, self.S.status.iter)) req_send = comm.isend(self.S.levels[l - 1].uend, dest=self.S.next, tag=self.S.status.iter) if not self.S.status.first and self.params.fine_comm: self.logger.debug( 'recv data: process %s, stage %s, time %s, source %s, tag %s, iter %s' % (self.S.status.slot, self.S.status.stage, self.S.time, self.S.prev, l - 1, self.S.status.iter)) self.recv(target=self.S.levels[l - 1], source=self.S.prev, tag=self.S.status.iter, comm=comm) if not self.S.status.last and self.params.fine_comm: req_send.wait() # on middle levels: do sweep as usual if l - 1 > 0: self.hooks.pre_sweep(step=self.S, level_number=l - 1) for k in range(self.S.levels[l - 1].params.nsweeps): self.S.levels[l - 1].sweep.update_nodes() self.S.levels[l - 1].sweep.compute_residual() self.hooks.post_sweep(step=self.S, level_number=l - 1) # update stage self.S.status.stage = 'IT_FINE' else: raise ControllerError('Weird stage, got %s' % self.S.status.stage)
def predict(local_MS_running): """ Predictor phase Args: local_MS_running (list): list of currently running steps """ for S in local_MS_running: self.hooks.pre_predict(step=S, level_number=0) if self.params.predict_type is None: pass elif self.params.predict_type == 'fine_only': # do a fine sweep only for S in local_MS_running: S.levels[0].sweep.update_nodes() elif self.params.predict_type == 'libpfasst_style': # loop over all steps for S in local_MS_running: # restrict to coarsest level for l in range(1, len(S.levels)): S.transfer(source=S.levels[l - 1], target=S.levels[l]) # run in serial on coarse level for S in local_MS_running: self.hooks.pre_comm(step=S, level_number=len(S.levels) - 1) # receive from previous step (if not first) if not S.status.first: self.logger.debug( 'Process %2i receives from %2i on level %2i with tag %s -- PREDICT' % (S.status.slot, S.prev.status.slot, len(S.levels) - 1, 0)) self.recv(S.levels[-1], S.prev.levels[-1], tag=(len(S.levels), 0, S.prev.status.slot)) self.hooks.post_comm(step=S, level_number=len(S.levels) - 1) # do the coarse sweep S.levels[-1].sweep.update_nodes() self.hooks.pre_comm(step=S, level_number=len(S.levels) - 1) # send to succ step if not S.status.last: self.logger.debug( 'Process %2i provides data on level %2i with tag %s -- PREDICT' % (S.status.slot, len(S.levels) - 1, 0)) self.send(S.levels[-1], tag=(len(S.levels), 0, S.status.slot)) self.hooks.post_comm(step=S, level_number=len(S.levels) - 1, add_to_stats=True) # go back to fine level, sweeping for l in range(self.nlevels - 1, 0, -1): for S in local_MS_running: # prolong values S.transfer(source=S.levels[l], target=S.levels[l - 1]) if l - 1 > 0: S.levels[l - 1].sweep.update_nodes() # end with a fine sweep for S in local_MS_running: S.levels[0].sweep.update_nodes() elif self.params.predict_type == 'pfasst_burnin': # loop over all steps for S in local_MS_running: # restrict to coarsest level for l in range(1, len(S.levels)): S.transfer(source=S.levels[l - 1], target=S.levels[l]) # loop over all steps for q in range(len(local_MS_running)): # loop over last steps: [1,2,3,4], [2,3,4], [3,4], [4] for p in range(q, len(local_MS_running)): S = local_MS_running[p] # do the sweep with new values S.levels[-1].sweep.update_nodes() self.hooks.pre_comm(step=S, level_number=len(S.levels) - 1) # send updated values on coarsest level self.logger.debug( 'Process %2i provides data on level %2i with tag %s -- PREDICT' % (S.status.slot, len(S.levels) - 1, 0)) self.send(S.levels[-1], tag=(len(S.levels), 0, S.status.slot)) self.hooks.post_comm(step=S, level_number=len(S.levels) - 1) # loop over last steps: [2,3,4], [3,4], [4] for p in range(q + 1, len(local_MS_running)): S = local_MS_running[p] # receive values sent during previous sweep self.hooks.pre_comm(step=S, level_number=len(S.levels) - 1) self.logger.debug( 'Process %2i receives from %2i on level %2i with tag %s -- PREDICT' % (S.status.slot, S.prev.status.slot, len(S.levels) - 1, 0)) self.recv(S.levels[-1], S.prev.levels[-1], tag=(len(S.levels), 0, S.prev.status.slot)) self.hooks.post_comm( step=S, level_number=len(S.levels) - 1, add_to_stats=(p == len(local_MS_running) - 1)) # loop over all steps for S in local_MS_running: # interpolate back to finest level for l in range(len(S.levels) - 1, 0, -1): S.transfer(source=S.levels[l], target=S.levels[l - 1]) # end this with a fine sweep for S in local_MS_running: S.levels[0].sweep.update_nodes() elif self.params.predict_type == 'fmg': # TODO: implement FMG predictor raise NotImplementedError( 'FMG predictor is not yet implemented') else: raise ControllerError('Wrong predictor type, got %s' % self.params.predict_type) for S in local_MS_running: self.hooks.post_predict(step=S, level_number=0) for S in local_MS_running: # update stage S.status.stage = 'IT_CHECK'
def pfasst(self, local_MS_active): """ Main function including the stages of SDC, MLSDC and PFASST (the "controller") For the workflow of this controller, check out one of our PFASST talks or the pySDC paper This method changes self.MS directly by accessing active steps through local_MS_active. Nothing is returned. Args: local_MS_active (list): all active steps """ def spread(local_MS_running): """ Spreading phase Args: local_MS_running (list): list of currently running steps """ for S in local_MS_running: # first stage: spread values self.hooks.pre_step(step=S, level_number=0) # call predictor from sweeper S.levels[0].sweep.predict() if self.params.use_iteration_estimator: # store pervious iterate to compute difference later on S.levels[0].uold[:] = S.levels[0].u[:] # update stage if len(S.levels) > 1: # MLSDC or PFASST with predict S.status.stage = 'PREDICT' else: S.status.stage = 'IT_CHECK' def predict(local_MS_running): """ Predictor phase Args: local_MS_running (list): list of currently running steps """ for S in local_MS_running: self.hooks.pre_predict(step=S, level_number=0) if self.params.predict_type is None: pass elif self.params.predict_type == 'fine_only': # do a fine sweep only for S in local_MS_running: S.levels[0].sweep.update_nodes() elif self.params.predict_type == 'libpfasst_style': # loop over all steps for S in local_MS_running: # restrict to coarsest level for l in range(1, len(S.levels)): S.transfer(source=S.levels[l - 1], target=S.levels[l]) # run in serial on coarse level for S in local_MS_running: self.hooks.pre_comm(step=S, level_number=len(S.levels) - 1) # receive from previous step (if not first) if not S.status.first: self.logger.debug( 'Process %2i receives from %2i on level %2i with tag %s -- PREDICT' % (S.status.slot, S.prev.status.slot, len(S.levels) - 1, 0)) self.recv(S.levels[-1], S.prev.levels[-1], tag=(len(S.levels), 0, S.prev.status.slot)) self.hooks.post_comm(step=S, level_number=len(S.levels) - 1) # do the coarse sweep S.levels[-1].sweep.update_nodes() self.hooks.pre_comm(step=S, level_number=len(S.levels) - 1) # send to succ step if not S.status.last: self.logger.debug( 'Process %2i provides data on level %2i with tag %s -- PREDICT' % (S.status.slot, len(S.levels) - 1, 0)) self.send(S.levels[-1], tag=(len(S.levels), 0, S.status.slot)) self.hooks.post_comm(step=S, level_number=len(S.levels) - 1, add_to_stats=True) # go back to fine level, sweeping for l in range(self.nlevels - 1, 0, -1): for S in local_MS_running: # prolong values S.transfer(source=S.levels[l], target=S.levels[l - 1]) if l - 1 > 0: S.levels[l - 1].sweep.update_nodes() # end with a fine sweep for S in local_MS_running: S.levels[0].sweep.update_nodes() elif self.params.predict_type == 'pfasst_burnin': # loop over all steps for S in local_MS_running: # restrict to coarsest level for l in range(1, len(S.levels)): S.transfer(source=S.levels[l - 1], target=S.levels[l]) # loop over all steps for q in range(len(local_MS_running)): # loop over last steps: [1,2,3,4], [2,3,4], [3,4], [4] for p in range(q, len(local_MS_running)): S = local_MS_running[p] # do the sweep with new values S.levels[-1].sweep.update_nodes() self.hooks.pre_comm(step=S, level_number=len(S.levels) - 1) # send updated values on coarsest level self.logger.debug( 'Process %2i provides data on level %2i with tag %s -- PREDICT' % (S.status.slot, len(S.levels) - 1, 0)) self.send(S.levels[-1], tag=(len(S.levels), 0, S.status.slot)) self.hooks.post_comm(step=S, level_number=len(S.levels) - 1) # loop over last steps: [2,3,4], [3,4], [4] for p in range(q + 1, len(local_MS_running)): S = local_MS_running[p] # receive values sent during previous sweep self.hooks.pre_comm(step=S, level_number=len(S.levels) - 1) self.logger.debug( 'Process %2i receives from %2i on level %2i with tag %s -- PREDICT' % (S.status.slot, S.prev.status.slot, len(S.levels) - 1, 0)) self.recv(S.levels[-1], S.prev.levels[-1], tag=(len(S.levels), 0, S.prev.status.slot)) self.hooks.post_comm( step=S, level_number=len(S.levels) - 1, add_to_stats=(p == len(local_MS_running) - 1)) # loop over all steps for S in local_MS_running: # interpolate back to finest level for l in range(len(S.levels) - 1, 0, -1): S.transfer(source=S.levels[l], target=S.levels[l - 1]) # end this with a fine sweep for S in local_MS_running: S.levels[0].sweep.update_nodes() elif self.params.predict_type == 'fmg': # TODO: implement FMG predictor raise NotImplementedError( 'FMG predictor is not yet implemented') else: raise ControllerError('Wrong predictor type, got %s' % self.params.predict_type) for S in local_MS_running: self.hooks.post_predict(step=S, level_number=0) for S in local_MS_running: # update stage S.status.stage = 'IT_CHECK' def it_check(local_MS_running): """ Key routine to check for convergence/termination Args: local_MS_running (list): list of currently running steps """ for S in local_MS_running: # send updated values forward self.hooks.pre_comm(step=S, level_number=0) if not S.status.last: self.logger.debug( 'Process %2i provides data on level %2i with tag %s' % (S.status.slot, 0, S.status.iter)) self.send(S.levels[0], tag=(0, S.status.iter, S.status.slot)) # receive values if not S.status.prev_done and not S.status.first: self.logger.debug( 'Process %2i receives from %2i on level %2i with tag %s' % (S.status.slot, S.prev.status.slot, 0, S.status.iter)) self.recv(S.levels[0], S.prev.levels[0], tag=(0, S.status.iter, S.prev.status.slot)) self.hooks.post_comm(step=S, level_number=0) S.levels[0].sweep.compute_residual() if self.params.use_iteration_estimator: self.check_iteration_estimator(local_MS_running) for S in local_MS_running: S.status.done = self.check_convergence(S) if S.status.iter > 0: self.hooks.post_iteration(step=S, level_number=0) for S in local_MS_running: if not S.status.first: self.hooks.pre_comm(step=S, level_number=0) S.status.prev_done = S.prev.status.done # "communicate" self.hooks.post_comm(step=S, level_number=0, add_to_stats=True) S.status.done = S.status.done and S.status.prev_done if self.params.all_to_done: self.hooks.pre_comm(step=S, level_number=0) S.status.done = all( [T.status.done for T in local_MS_running]) self.hooks.post_comm(step=S, level_number=0, add_to_stats=True) if not S.status.done: # increment iteration count here (and only here) S.status.iter += 1 self.hooks.pre_iteration(step=S, level_number=0) if self.params.use_iteration_estimator: # store pervious iterate to compute difference later on S.levels[0].uold[:] = S.levels[0].u[:] if len(S.levels) > 1: # MLSDC or PFASST S.status.stage = 'IT_DOWN' else: # SDC or MSSDC if len( local_MS_running ) == 1 or self.params.mssdc_jac: # SDC or parallel MSSDC (Jacobi-like) S.status.stage = 'IT_FINE' else: S.status.stage = 'IT_COARSE' # serial MSSDC (Gauss-like) else: S.levels[0].sweep.compute_end_point() self.hooks.post_step(step=S, level_number=0) S.status.stage = 'DONE' def it_fine(local_MS_running): """ Fine sweeps Args: local_MS_running (list): list of currently running steps """ for S in local_MS_running: S.levels[0].status.sweep = 0 for k in range(self.nsweeps[0]): for S in local_MS_running: S.levels[0].status.sweep += 1 for S in local_MS_running: # send updated values forward self.hooks.pre_comm(step=S, level_number=0) if not S.status.last: self.logger.debug( 'Process %2i provides data on level %2i with tag %s' % (S.status.slot, 0, S.status.iter)) self.send(S.levels[0], tag=(0, S.status.iter, S.status.slot)) # # receive values if not S.status.prev_done and not S.status.first: self.logger.debug( 'Process %2i receives from %2i on level %2i with tag %s' % (S.status.slot, S.prev.status.slot, 0, S.status.iter)) self.recv(S.levels[0], S.prev.levels[0], tag=(0, S.status.iter, S.prev.status.slot)) self.hooks.post_comm(step=S, level_number=0, add_to_stats=(k == self.nsweeps[0] - 1)) for S in local_MS_running: # standard sweep workflow: update nodes, compute residual, log progress self.hooks.pre_sweep(step=S, level_number=0) S.levels[0].sweep.update_nodes() S.levels[0].sweep.compute_residual() self.hooks.post_sweep(step=S, level_number=0) for S in local_MS_running: # update stage S.status.stage = 'IT_CHECK' def it_down(local_MS_running): """ Go down the hierarchy from finest to coarsest level Args: local_MS_running (list): list of currently running steps """ for S in local_MS_running: S.transfer(source=S.levels[0], target=S.levels[1]) for l in range(1, self.nlevels - 1): # sweep on middle levels (not on finest, not on coarsest, though) for k in range(self.nsweeps[l]): for S in local_MS_running: # send updated values forward self.hooks.pre_comm(step=S, level_number=l) if not S.status.last: self.logger.debug( 'Process %2i provides data on level %2i with tag %s' % (S.status.slot, l, S.status.iter)) self.send(S.levels[l], tag=(l, S.status.iter, S.status.slot)) # # receive values if not S.status.prev_done and not S.status.first: self.logger.debug( 'Process %2i receives from %2i on level %2i with tag %s' % (S.status.slot, S.prev.status.slot, l, S.status.iter)) self.recv(S.levels[l], S.prev.levels[l], tag=(l, S.status.iter, S.prev.status.slot)) self.hooks.post_comm(step=S, level_number=l) for S in local_MS_running: self.hooks.pre_sweep(step=S, level_number=l) S.levels[l].sweep.update_nodes() S.levels[l].sweep.compute_residual() self.hooks.post_sweep(step=S, level_number=l) for S in local_MS_running: # transfer further down the hierarchy S.transfer(source=S.levels[l], target=S.levels[l + 1]) for S in local_MS_running: # update stage S.status.stage = 'IT_COARSE' def it_coarse(local_MS_running): """ Coarse sweep Args: local_MS_running (list): list of currently running steps """ for S in local_MS_running: # receive from previous step (if not first) self.hooks.pre_comm(step=S, level_number=len(S.levels) - 1) if not S.status.first and not S.status.prev_done: self.logger.debug( 'Process %2i receives from %2i on level %2i with tag %s' % (S.status.slot, S.prev.status.slot, len(S.levels) - 1, S.status.iter)) self.recv(S.levels[-1], S.prev.levels[-1], tag=(len(S.levels), S.status.iter, S.prev.status.slot)) self.hooks.post_comm(step=S, level_number=len(S.levels) - 1) # do the sweep self.hooks.pre_sweep(step=S, level_number=len(S.levels) - 1) S.levels[-1].sweep.update_nodes() S.levels[-1].sweep.compute_residual() self.hooks.post_sweep(step=S, level_number=len(S.levels) - 1) # send to succ step self.hooks.pre_comm(step=S, level_number=len(S.levels) - 1) if not S.status.last: self.logger.debug( 'Process %2i provides data on level %2i with tag %s' % (S.status.slot, len(S.levels) - 1, S.status.iter)) self.send(S.levels[-1], tag=(len(S.levels), S.status.iter, S.status.slot)) self.hooks.post_comm(step=S, level_number=len(S.levels) - 1, add_to_stats=True) # update stage if len(S.levels) > 1: # MLSDC or PFASST S.status.stage = 'IT_UP' else: # MSSDC S.status.stage = 'IT_CHECK' def it_up(local_MS_running): """ Prolong corrections up to finest level (parallel) Args: local_MS_running (list): list of currently running steps """ for l in range(self.nlevels - 1, 0, -1): for S in local_MS_running: # prolong values S.transfer(source=S.levels[l], target=S.levels[l - 1]) # on middle levels: do communication and sweep as usual if l - 1 > 0: for k in range(self.nsweeps[l - 1]): for S in local_MS_running: # send updated values forward self.hooks.pre_comm(step=S, level_number=l - 1) if not S.status.last: self.logger.debug( 'Process %2i provides data on level %2i with tag %s' % (S.status.slot, l - 1, S.status.iter)) self.send(S.levels[l - 1], tag=(l - 1, S.status.iter, S.status.slot)) # # receive values if not S.status.prev_done and not S.status.first: self.logger.debug( 'Process %2i receives from %2i on level %2i with tag %s' % (S.status.slot, S.prev.status.slot, l - 1, S.status.iter)) self.recv(S.levels[l - 1], S.prev.levels[l - 1], tag=(l - 1, S.status.iter, S.prev.status.slot)) self.hooks.post_comm( step=S, level_number=l - 1, add_to_stats=(k == self.nsweeps[l - 1] - 1)) for S in local_MS_running: self.hooks.pre_sweep(step=S, level_number=l - 1) S.levels[l - 1].sweep.update_nodes() S.levels[l - 1].sweep.compute_residual() self.hooks.post_sweep(step=S, level_number=l - 1) for S in local_MS_running: # update stage S.status.stage = 'IT_FINE' def default(local_MS_running): """ Default routine to catch wrong status Args: local_MS_running (list): list of currently running steps """ raise ControllerError('Unknown stage, got %s' % local_MS_running[0].status.stage) # TODO # if all stages are the same (or DONE), continue, otherwise abort stages = [ S.status.stage for S in local_MS_active if S.status.stage is not 'DONE' ] if stages[1:] == stages[:-1]: stage = stages[0] else: raise ControllerError('not all stages are equal') self.logger.debug(stage) MS_running = [ S for S in local_MS_active if S.status.stage is not 'DONE' ] switcher = { 'SPREAD': spread, 'PREDICT': predict, 'IT_CHECK': it_check, 'IT_FINE': it_fine, 'IT_DOWN': it_down, 'IT_COARSE': it_coarse, 'IT_UP': it_up } switcher.get(stage, default)(MS_running) return all([S.status.done for S in local_MS_active])
def run(self, u0, t0, Tend): """ Main driver for running the serial version of SDC, MSSDC, MLSDC and PFASST (virtual parallelism) Args: u0: initial values t0: starting time Tend: ending time Returns: end values on the finest level stats object containing statistics for each step, each level and each iteration """ # some initializations and reset of statistics uend = None num_procs = len(self.MS) self.hooks.reset_stats() # initial ordering of the steps: 0,1,...,Np-1 slots = [p for p in range(num_procs)] # initialize time variables of each step time = [t0 + sum(self.MS[j].dt for j in range(p)) for p in slots] # determine which steps are still active (time < Tend) active = [time[p] < Tend - 10 * np.finfo(float).eps for p in slots] if not any(active): raise ControllerError('Nothing to do, check t0, dt and Tend.') # compress slots according to active steps, i.e. remove all steps which have times above Tend active_slots = list(itertools.compress(slots, active)) # initialize block of steps with u0 self.restart_block(active_slots, time, u0) self.hooks.post_setup(step=None, level_number=None) # call pre-run hook for S in self.MS: self.hooks.pre_run(step=S, level_number=0) # main loop: as long as at least one step is still active (time < Tend), do something while any(active): MS_active = [self.MS[p] for p in active_slots] done = False while not done: done = self.pfasst(MS_active) # uend is uend of the last active step in the list uend = self.MS[active_slots[-1]].levels[0].uend for p in active_slots: time[p] += num_procs * self.MS[p].dt # determine new set of active steps and compress slots accordingly active = [time[p] < Tend - 10 * np.finfo(float).eps for p in slots] active_slots = list(itertools.compress(slots, active)) # restart active steps (reset all values and pass uend to u0) self.restart_block(active_slots, time, uend) # call post-run hook for S in self.MS: self.hooks.post_run(step=S, level_number=0) return uend, self.hooks.return_stats()
def pfasst(self, MS): """ Main function including the stages of SDC, MLSDC and PFASST (the "controller") For the workflow of this controller, check out one of our PFASST talks Args: MS: all active steps Returns: all active steps """ # if all stages are the same, continue, otherwise abort if all(S.status.stage for S in MS): stage = MS[0].status.stage else: raise ControllerError('not all stages are equal') self.logger.debug(stage) if stage == 'SPREAD': # (potentially) serial spreading phase for S in MS: # first stage: spread values self.hooks.pre_step(step=S, level_number=0) # call predictor from sweeper S.levels[0].sweep.predict() # update stage if len( S.levels ) > 1 and self.params.predict: # MLSDC or PFASST with predict S.status.stage = 'PREDICT' else: S.status.stage = 'IT_CHECK' return MS elif stage == 'PREDICT': # call predictor (serial) MS = self.predictor(MS) for S in MS: # update stage S.status.stage = 'IT_CHECK' return MS elif stage == 'IT_CHECK': # check whether to stop iterating (parallel) for S in MS: # send updated values forward if self.params.fine_comm and not S.status.last: self.logger.debug( 'Process %2i provides data on level %2i with tag %s' % (S.status.slot, 0, S.status.iter)) self.send(S.levels[0], tag=(0, S.status.iter, S.status.slot)) # # receive values if self.params.fine_comm and not S.status.first: self.logger.debug( 'Process %2i receives from %2i on level %2i with tag %s' % (S.status.slot, S.prev.status.slot, 0, S.status.iter)) self.recv(S.levels[0], S.prev.levels[0], tag=(0, S.status.iter, S.prev.status.slot)) S.levels[0].sweep.compute_residual() S.status.done = self.check_convergence(S) if S.status.iter > 0: self.hooks.post_iteration(step=S, level_number=0) # if not everyone is ready yet, keep doing stuff if not all(S.status.done for S in MS): for S in MS: S.status.done = False # increment iteration count here (and only here) S.status.iter += 1 self.hooks.pre_iteration(step=S, level_number=0) if len(S.levels) > 1: # MLSDC or PFASST S.status.stage = 'IT_UP' else: # SDC S.status.stage = 'IT_FINE' else: # if everyone is ready, end for S in MS: S.levels[0].sweep.compute_end_point() self.hooks.post_step(step=S, level_number=0) S.status.stage = 'DONE' return MS elif stage == 'IT_FINE': # do fine sweep for all steps (virtually parallel) for S in MS: S.levels[0].status.sweep = 0 for k in range(self.nsweeps[0]): for S in MS: S.levels[0].status.sweep += 1 for S in MS: # standard sweep workflow: update nodes, compute residual, log progress self.hooks.pre_sweep(step=S, level_number=0) S.levels[0].sweep.update_nodes() for S in MS: # send updated values forward if self.params.fine_comm and not S.status.last: self.logger.debug( 'Process %2i provides data on level %2i with tag %s' % (S.status.slot, 0, S.status.iter)) self.send(S.levels[0], tag=(0, S.status.iter, S.status.slot)) # # receive values if self.params.fine_comm and not S.status.first: self.logger.debug( 'Process %2i receives from %2i on level %2i with tag %s' % (S.status.slot, S.prev.status.slot, 0, S.status.iter)) self.recv(S.levels[0], S.prev.levels[0], tag=(0, S.status.iter, S.prev.status.slot)) S.levels[0].sweep.compute_residual() self.hooks.post_sweep(step=S, level_number=0) for S in MS: # update stage S.status.stage = 'IT_CHECK' return MS elif stage == 'IT_UP': # go up the hierarchy from finest to coarsest level (parallel) for S in MS: S.transfer(source=S.levels[0], target=S.levels[1]) for l in range(1, self.nlevels - 1): # sweep on middle levels (not on finest, not on coarsest, though) for k in range(self.nsweeps[l]): for S in MS: self.hooks.pre_sweep(step=S, level_number=l) S.levels[l].sweep.update_nodes() # send updated values forward if self.params.fine_comm and not S.status.last: self.logger.debug( 'Process %2i provides data on level %2i with tag %s' % (S.status.slot, l, S.status.iter)) self.send(S.levels[l], tag=(l, S.status.iter, S.status.slot)) # # receive values if self.params.fine_comm and not S.status.first: self.logger.debug( 'Process %2i receives from %2i on level %2i with tag %s' % (S.status.slot, S.prev.status.slot, l, S.status.iter)) self.recv(S.levels[l], S.prev.levels[l], tag=(l, S.status.iter, S.prev.status.slot)) S.levels[l].sweep.compute_residual() self.hooks.post_sweep(step=S, level_number=l) for S in MS: # transfer further up the hierarchy S.transfer(source=S.levels[l], target=S.levels[l + 1]) for S in MS: # update stage S.status.stage = 'IT_COARSE' return MS elif stage == 'IT_COARSE': # sweeps on coarsest level (serial/blocking) for S in MS: # receive from previous step (if not first) if not S.status.first: self.logger.debug( 'Process %2i receives from %2i on level %2i with tag %s' % (S.status.slot, S.prev.status.slot, len(S.levels) - 1, S.status.iter)) self.recv(S.levels[-1], S.prev.levels[-1], tag=(len(S.levels), S.status.iter, S.prev.status.slot)) # do the sweep self.hooks.pre_sweep(step=S, level_number=len(S.levels) - 1) S.levels[-1].sweep.update_nodes() S.levels[-1].sweep.compute_residual() self.hooks.post_sweep(step=S, level_number=len(S.levels) - 1) # send to succ step if not S.status.last: self.logger.debug( 'Process %2i provides data on level %2i with tag %s' % (S.status.slot, len(S.levels) - 1, S.status.iter)) self.send(S.levels[-1], tag=(len(S.levels), S.status.iter, S.status.slot)) # update stage if len(S.levels) > 1: # MLSDC or PFASST S.status.stage = 'IT_DOWN' else: # MSSDC S.status.stage = 'IT_CHECK' return MS elif stage == 'IT_DOWN': # prolong corrections down to finest level (parallel) for l in range(self.nlevels - 1, 0, -1): for S in MS: # prolong values S.transfer(source=S.levels[l], target=S.levels[l - 1]) # send updated values forward if self.params.fine_comm and not S.status.last: self.logger.debug( 'Process %2i provides data on level %2i with tag %s' % (S.status.slot, l - 1, S.status.iter)) self.send(S.levels[l - 1], tag=(l - 1, S.status.iter, S.status.slot)) # # receive values if self.params.fine_comm and not S.status.first: self.logger.debug( 'Process %2i receives from %2i on level %2i with tag %s' % (S.status.slot, S.prev.status.slot, l - 1, S.status.iter)) self.recv(S.levels[l - 1], S.prev.levels[l - 1], tag=(l - 1, S.status.iter, S.prev.status.slot)) S.levels[l - 1].sweep.compute_residual() # on middle levels: do communication and sweep as usual if l - 1 > 0: for k in range(self.nsweeps[l - 1]): for S in MS: self.hooks.pre_sweep(step=S, level_number=l - 1) S.levels[l - 1].sweep.update_nodes() # send updated values forward if self.params.fine_comm and not S.status.last: self.logger.debug( 'Process %2i provides data on level %2i with tag %s' % (S.status.slot, l - 1, S.status.iter)) self.send(S.levels[l - 1], tag=(l - 1, S.status.iter, S.status.slot)) # # receive values if self.params.fine_comm and not S.status.first: self.logger.debug( 'Process %2i receives from %2i on level %2i with tag %s' % (S.status.slot, S.prev.status.slot, l - 1, S.status.iter)) self.recv(S.levels[l - 1], S.prev.levels[l - 1], tag=(l - 1, S.status.iter, S.prev.status.slot)) S.levels[l - 1].sweep.compute_residual() self.hooks.post_sweep(step=S, level_number=l - 1) # on finest level, first check for convergence (where we will communication, too) for S in MS: # update stage S.status.stage = 'IT_FINE' return MS else: raise ControllerError('Unknown stage, got %s' % stage)
def pfasst(self, comm, num_procs): """ Main function including the stages of SDC, MLSDC and PFASST (the "controller") For the workflow of this controller, check out one of our PFASST talks Args: comm: communicator num_procs: number of active processors """ stage = self.S.status.stage self.logger.debug(stage) if stage == 'SPREAD': # (potentially) serial spreading phase # first stage: spread values self.hooks.pre_step(step=self.S, level_number=0) # call predictor from sweeper self.S.levels[0].sweep.predict() # update stage if len(self.S.levels) > 1 and self.params.predict: # MLSDC or PFASST with predict self.S.status.stage = 'PREDICT' else: self.S.status.stage = 'IT_CHECK' elif stage == 'PREDICT': # call predictor (serial) self.predictor(comm) # update stage self.S.status.stage = 'IT_CHECK' elif stage == 'IT_CHECK': # check whether to stop iterating (parallel) # check for convergence or abort self.S.levels[0].sweep.compute_residual() self.S.status.done = self.check_convergence(self.S) if self.S.status.iter > 0: self.hooks.post_iteration(step=self.S, level_number=0) # check if an open request of the status send is pending if self.req_status is not None: self.req_status.wait() # send status forward if not self.S.status.last: self.logger.debug('isend status: status %s, process %s, time %s, target %s, tag %s, iter %s' % (self.S.status.done, self.S.status.slot, self.S.time, self.S.next, 99, self.S.status.iter)) self.req_status = comm.isend(self.S.status.done, dest=self.S.next, tag=99) # recv status if not self.S.status.first and not self.S.status.prev_done: self.S.status.prev_done = comm.recv(source=self.S.prev, tag=99) self.logger.debug('recv status: status %s, process %s, time %s, target %s, tag %s, iter %s' % (self.S.status.prev_done, self.S.status.slot, self.S.time, self.S.next, 99, self.S.status.iter)) self.S.status.done = self.S.status.done and self.S.status.prev_done # if I'm not done or the guy left of me is not done, keep doing stuff if not self.S.status.done: # increment iteration count here (and only here) self.S.status.iter += 1 self.hooks.pre_iteration(step=self.S, level_number=0) if len(self.S.levels) > 1: # MLSDC or PFASST self.S.status.stage = 'IT_UP' else: # SDC self.S.status.stage = 'IT_FINE' else: self.S.levels[0].sweep.compute_end_point() self.hooks.post_step(step=self.S, level_number=0) self.S.status.stage = 'DONE' elif stage == 'IT_FINE': # do fine sweep # standard sweep workflow: update nodes, compute residual, log progress self.hooks.pre_sweep(step=self.S, level_number=0) for k in range(self.S.levels[0].params.nsweeps): self.S.levels[0].sweep.update_nodes() self.S.levels[0].sweep.compute_residual() self.hooks.post_sweep(step=self.S, level_number=0) # wait for pending sends before computing uend, if any if len(self.req_send) > 0 and not self.S.status.last and self.params.fine_comm: self.req_send[0].wait() self.S.levels[0].sweep.compute_end_point() if not self.S.status.last and self.params.fine_comm: self.logger.debug('isend data: process %s, stage %s, time %s, target %s, tag %s, iter %s' % (self.S.status.slot, self.S.status.stage, self.S.time, self.S.next, 0, self.S.status.iter)) self.req_send.append(self.S.levels[0].uend.isend(dest=self.S.next, tag=0, comm=comm)) # update stage self.S.status.stage = 'IT_CHECK' elif stage == 'IT_UP': # go up the hierarchy from finest to coarsest level (parallel) self.S.transfer(source=self.S.levels[0], target=self.S.levels[1]) # sweep and send on middle levels (not on finest, not on coarsest, though) for l in range(1, len(self.S.levels) - 1): self.hooks.pre_sweep(step=self.S, level_number=l) for k in range(self.S.levels[l].params.nsweeps): self.S.levels[l].sweep.update_nodes() self.S.levels[l].sweep.compute_residual() self.hooks.post_sweep(step=self.S, level_number=l) # wait for pending sends before computing uend, if any if len(self.req_send) > l and not self.S.status.last and self.params.fine_comm: self.req_send[l].wait() self.S.levels[l].sweep.compute_end_point() if not self.S.status.last and self.params.fine_comm: self.logger.debug('isend data: process %s, stage %s, time %s, target %s, tag %s, iter %s' % (self.S.status.slot, self.S.status.stage, self.S.time, self.S.next, l, self.S.status.iter)) self.req_send.append(self.S.levels[l].uend.isend(dest=self.S.next, tag=l, comm=comm)) # transfer further up the hierarchy self.S.transfer(source=self.S.levels[l], target=self.S.levels[l + 1]) # update stage self.S.status.stage = 'IT_COARSE_RECV' elif stage == 'IT_COARSE_RECV': # receive from previous step (if not first) if not self.S.status.first and not self.S.status.prev_done: self.logger.debug('recv data: process %s, stage %s, time %s, source %s, tag %s, iter %s' % (self.S.status.slot, self.S.status.stage, self.S.time, self.S.prev, len(self.S.levels) - 1, self.S.status.iter)) self.S.levels[-1].u[0].recv(source=self.S.prev, tag=len(self.S.levels) - 1, comm=comm) # update stage self.S.status.stage = 'IT_COARSE' elif stage == 'IT_COARSE': # sweeps on coarsest level (serial/blocking) # do the sweep self.hooks.pre_sweep(step=self.S, level_number=len(self.S.levels) - 1) for k in range(self.S.levels[-1].params.nsweeps): self.S.levels[-1].sweep.update_nodes() self.S.levels[-1].sweep.compute_residual() self.hooks.post_sweep(step=self.S, level_number=len(self.S.levels) - 1) self.S.levels[-1].sweep.compute_end_point() # send to next step if not self.S.status.last: self.logger.debug('send data: process %s, stage %s, time %s, target %s, tag %s, iter %s' % (self.S.status.slot, self.S.status.stage, self.S.time, self.S.next, len(self.S.levels) - 1, self.S.status.iter)) self.S.levels[-1].uend.send(dest=self.S.next, tag=len(self.S.levels) - 1, comm=comm) # update stage self.S.status.stage = 'IT_DOWN' elif stage == 'IT_DOWN': # prolong corrections down to finest level (parallel) # receive and sweep on middle levels (except for coarsest level) for l in range(len(self.S.levels) - 1, 0, -1): if not self.S.status.first and self.params.fine_comm and not self.S.status.prev_done: self.logger.debug('recv data: process %s, stage %s, time %s, source %s, tag %s, iter %s' % (self.S.status.slot, self.S.status.stage, self.S.time, self.S.prev, l - 1, self.S.status.iter)) self.S.levels[l - 1].u[0].recv(source=self.S.prev, tag=l - 1, comm=comm) # prolong values self.S.transfer(source=self.S.levels[l], target=self.S.levels[l - 1]) # on middle levels: do sweep as usual if l - 1 > 0: self.hooks.pre_sweep(step=self.S, level_number=l - 1) for k in range(self.S.levels[l - 1].params.nsweeps): self.S.levels[l - 1].sweep.update_nodes() self.S.levels[l - 1].sweep.compute_residual() self.hooks.post_sweep(step=self.S, level_number=l - 1) # update stage self.S.status.stage = 'IT_FINE' else: raise ControllerError('Weird stage, got %s' % self.S.status.stage)
def default(): """ Default routine to catch wrong status """ raise ControllerError('Weird stage, got %s' % self.S.status.stage)
def run(self, u0, t0, Tend): """ Main driver for running the parallel version of SDC, MSSDC, MLSDC and PFASST Args: u0: initial values t0: starting time Tend: ending time Returns: end values on the finest level stats object containing statistics for each step, each level and each iteration """ # reset stats to prevent double entries from old runs self.hooks.reset_stats() # find active processes and put into new communicator rank = self.comm.Get_rank() num_procs = self.comm.Get_size() all_dt = self.comm.allgather(self.S.dt) all_time = [t0 + sum(all_dt[0:i]) for i in range(num_procs)] time = all_time[rank] all_active = all_time < Tend - 10 * np.finfo(float).eps if not any(all_active): raise ControllerError('Nothing to do, check t0, dt and Tend') active = all_active[rank] if not all(all_active): comm_active = self.comm.Split(active) rank = comm_active.Get_rank() num_procs = comm_active.Get_size() else: comm_active = self.comm self.S.status.slot = rank # initialize block of steps with u0 self.restart_block(num_procs, time, u0) uend = u0 # call post-setup hook self.hooks.post_setup(step=None, level_number=None) # call pre-run hook self.hooks.pre_run(step=self.S, level_number=0) comm_active.Barrier() # while any process still active... while active: while not self.S.status.done: self.pfasst(comm_active, num_procs) time += self.S.dt # broadcast uend, set new times and fine active processes tend = comm_active.bcast(time, root=num_procs - 1) uend = self.S.levels[0].uend.bcast(root=num_procs - 1, comm=comm_active) all_dt = comm_active.allgather(self.S.dt) all_time = [tend + sum(all_dt[0:i]) for i in range(num_procs)] time = all_time[rank] all_active = all_time < Tend - 10 * np.finfo(float).eps active = all_active[rank] if not all(all_active): comm_active = comm_active.Split(active) rank = comm_active.Get_rank() num_procs = comm_active.Get_size() self.S.status.slot = rank # initialize block of steps with u0 self.restart_block(num_procs, time, uend) # call post-run hook self.hooks.post_run(step=self.S, level_number=0) comm_active.Free() return uend, self.hooks.return_stats()
def predict(): """ Predictor phase """ self.hooks.pre_predict(step=self.S, level_number=0) if self.params.predict_type is None: pass elif self.params.predict_type == 'fine_only': # do a fine sweep only self.S.levels[0].sweep.update_nodes() elif self.params.predict_type == 'libpfasst_style': # restrict to coarsest level for l in range(1, len(self.S.levels)): self.S.transfer(source=self.S.levels[l - 1], target=self.S.levels[l]) self.hooks.pre_comm(step=self.S, level_number=len(self.S.levels) - 1) if not self.S.status.first: self.logger.debug( 'recv data predict: process %s, stage %s, time, %s, source %s, tag %s' % (self.S.status.slot, self.S.status.stage, self.S.time, self.S.prev, self.S.status.iter)) self.recv(target=self.S.levels[-1], source=self.S.prev, tag=self.S.status.iter, comm=comm) self.hooks.post_comm(step=self.S, level_number=len(self.S.levels) - 1) # do the sweep with new values self.S.levels[-1].sweep.update_nodes() self.S.levels[-1].sweep.compute_end_point() self.hooks.pre_comm(step=self.S, level_number=len(self.S.levels) - 1) if not self.S.status.last: self.logger.debug( 'send data predict: process %s, stage %s, time, %s, target %s, tag %s' % (self.S.status.slot, self.S.status.stage, self.S.time, self.S.next, self.S.status.iter)) self.S.levels[-1].uend.isend(dest=self.S.next, tag=self.S.status.iter, comm=comm).Wait() self.hooks.post_comm(step=self.S, level_number=len(self.S.levels) - 1, add_to_stats=True) # go back to fine level, sweeping for l in range(len(self.S.levels) - 1, 0, -1): # prolong values self.S.transfer(source=self.S.levels[l], target=self.S.levels[l - 1]) # on middle levels: do sweep as usual if l - 1 > 0: self.S.levels[l - 1].sweep.update_nodes() # end with a fine sweep self.S.levels[0].sweep.update_nodes() elif self.params.predict_type == 'pfasst_burnin': # restrict to coarsest level for l in range(1, len(self.S.levels)): self.S.transfer(source=self.S.levels[l - 1], target=self.S.levels[l]) for p in range(self.S.status.slot + 1): self.hooks.pre_comm(step=self.S, level_number=len(self.S.levels) - 1) if not p == 0 and not self.S.status.first: self.logger.debug( 'recv data predict: process %s, stage %s, time, %s, source %s, tag %s, phase %s' % (self.S.status.slot, self.S.status.stage, self.S.time, self.S.prev, self.S.status.iter, p)) self.recv(target=self.S.levels[-1], source=self.S.prev, tag=self.S.status.iter, comm=comm) self.hooks.post_comm(step=self.S, level_number=len(self.S.levels) - 1) # do the sweep with new values self.S.levels[-1].sweep.update_nodes() self.S.levels[-1].sweep.compute_end_point() self.hooks.pre_comm(step=self.S, level_number=len(self.S.levels) - 1) if not self.S.status.last: self.logger.debug( 'send data predict: process %s, stage %s, time, %s, target %s, tag %s, phase %s' % (self.S.status.slot, self.S.status.stage, self.S.time, self.S.next, self.S.status.iter, p)) self.S.levels[-1].uend.isend(dest=self.S.next, tag=self.S.status.iter, comm=comm).Wait() self.hooks.post_comm( step=self.S, level_number=len(self.S.levels) - 1, add_to_stats=(p == self.S.status.slot)) # interpolate back to finest level for l in range(len(self.S.levels) - 1, 0, -1): self.S.transfer(source=self.S.levels[l], target=self.S.levels[l - 1]) # end this with a fine sweep self.S.levels[0].sweep.update_nodes() elif self.params.predict_type == 'fmg': # TODO: implement FMG predictor raise NotImplementedError( 'FMG predictor is not yet implemented') else: raise ControllerError('Wrong predictor type, got %s' % self.params.predict_type) self.hooks.post_predict(step=self.S, level_number=0) # update stage self.S.status.stage = 'IT_CHECK'
def pfasst(self, S, num_procs): """ Main function including the stages of SDC, MLSDC and PFASST (the "controller") For the workflow of this controller, check out one of our PFASST talks Args: S: currently active step num_procs: number of active processors Returns: updated step """ # if S is done, stop right here if S.status.done: return S stage = S.status.stage self.logger.debug("Process %2i at stage %s" % (S.status.slot, stage)) if stage == 'SPREAD': # first stage: spread values self.hooks.pre_step(step=S, level_number=0) # call predictor from sweeper S.levels[0].sweep.predict() # update stage if len(S.levels) > 1 and self.params.predict: # MLSDC or PFASST with predict S.status.stage = 'PREDICT_RESTRICT' else: # SDC S.status.stage = 'IT_CHECK' return S elif stage == 'PREDICT_RESTRICT': # call predictor (serial) # go to coarsest level via transfer for l in range(1, len(S.levels)): S.transfer(source=S.levels[l - 1], target=S.levels[l]) # update stage and return S.status.stage = 'PREDICT_SWEEP' return S elif stage == 'PREDICT_SWEEP': # do a (serial) sweep on coarsest level # receive new values from previous step (if not first step) if not S.status.first: if S.prev.levels[-1].tag: self.logger.debug('Process %2i receives from %2i on level %2i with tag %s -- PREDICT' % (S.status.slot, S.prev.status.slot, len(S.levels) - 1, True)) self.recv(S.levels[-1], S.prev.levels[-1]) # reset tag to signal successful receive S.prev.levels[-1].tag = False # do the sweep with (possibly) new values S.levels[-1].sweep.update_nodes() # update stage and return S.status.stage = 'PREDICT_SEND' return S elif stage == 'PREDICT_SEND': # send updated values on coarsest level # send new values forward, if previous send was successful (otherwise: try again) if not S.status.last: if not S.levels[-1].tag: self.logger.debug('Process %2i provides data on level %2i with tag %s -- PREDICT' % (S.status.slot, len(S.levels) - 1, True)) self.send(S.levels[-1], tag=True) else: S.status.stage = 'PREDICT_SEND' return S # decrement counter to determine how many coarse sweeps are necessary S.status.pred_cnt -= 1 # update stage and return if S.status.pred_cnt == 0: S.status.stage = 'PREDICT_INTERP' else: S.status.stage = 'PREDICT_SWEEP' return S elif stage == 'PREDICT_INTERP': # prolong back to finest level for l in range(len(S.levels) - 1, 0, -1): S.transfer(source=S.levels[l], target=S.levels[l - 1]) # update stage and return S.status.stage = 'IT_CHECK' return S elif stage == 'IT_CHECK': # check whether to stop iterating S.levels[0].sweep.compute_residual() S.status.done = self.check_convergence(S) if S.status.iter > 0: self.hooks.post_iteration(step=S, level_number=0) # if the previous step is still iterating but I am done, un-do me to still forward values if not S.status.first and S.status.done and (S.prev.status.done is not None and not S.prev.status.done): S.status.done = False # if I am done, signal accordingly, otherwise proceed if S.status.done: S.levels[0].sweep.compute_end_point() self.hooks.post_step(step=S, level_number=0) S.status.stage = 'DONE' else: # increment iteration count here (and only here) S.status.iter += 1 self.hooks.pre_iteration(step=S, level_number=0) if len(S.levels) > 1: S.status.stage = 'IT_UP' else: # SDC S.status.stage = 'IT_FINE_SWEEP' # return return S elif stage == 'IT_FINE_SWEEP': # do sweep on finest level # standard sweep workflow: update nodes, compute residual, log progress self.hooks.pre_sweep(step=S, level_number=0) for k in range(S.levels[0].params.nsweeps): S.levels[0].sweep.update_nodes() S.levels[0].sweep.compute_residual() self.hooks.post_sweep(step=S, level_number=0) # update stage and return S.status.stage = 'IT_FINE_SEND' return S elif stage == 'IT_FINE_SEND': # send forward values on finest level # if last send succeeded on this level or if last rank, send new values (otherwise: try again) if not S.levels[0].tag or S.status.last or S.next.status.done: if self.params.fine_comm: self.logger.debug('Process %2i provides data on level %2i with tag %s' % (S.status.slot, 0, True)) self.send(S.levels[0], tag=True) S.status.stage = 'IT_CHECK' else: S.status.stage = 'IT_FINE_SEND' # return return S elif stage == 'IT_UP': # go up the hierarchy from finest to coarsest level S.transfer(source=S.levels[0], target=S.levels[1]) # sweep and send on middle levels (not on finest, not on coarsest, though) for l in range(1, len(S.levels) - 1): self.hooks.pre_sweep(step=S, level_number=l) for k in range(S.levels[l].params.nsweeps): S.levels[l].sweep.update_nodes() S.levels[l].sweep.compute_residual() self.hooks.post_sweep(step=S, level_number=l) # send if last send succeeded on this level (otherwise: abort with error) if not S.levels[l].tag or S.status.last or S.next.status.done: if self.params.fine_comm: self.logger.debug('Process %2i provides data on level %2i with tag %s' % (S.status.slot, l, True)) self.send(S.levels[l], tag=True) else: raise CommunicationError('Sending failed on process %2i, level %2i' % (S.status.slot, l)) # transfer further up the hierarchy S.transfer(source=S.levels[l], target=S.levels[l + 1]) # update stage and return S.status.stage = 'IT_COARSE_RECV' return S elif stage == 'IT_COARSE_RECV': # receive on coarsest level # rather complex logic here... # if I am not the first in line and if the first is not done yet, try to receive # otherwise: proceed, no receiving possible/necessary if not S.status.first and not S.prev.status.done: # try to receive and the progress (otherwise: try again) if S.prev.levels[-1].tag: self.logger.debug('Process %2i receives from %2i on level %2i with tag %s' % (S.status.slot, S.prev.status.slot, len(S.levels) - 1, True)) self.recv(S.levels[-1], S.prev.levels[-1]) S.prev.levels[-1].tag = False if len(S.levels) > 1 or num_procs > 1: S.status.stage = 'IT_COARSE_SWEEP' else: raise ControllerError('Stage unclear after coarse send') else: S.status.stage = 'IT_COARSE_RECV' else: if len(S.levels) > 1 or num_procs > 1: S.status.stage = 'IT_COARSE_SWEEP' else: raise ControllerError('Stage unclear after coarse send') # return return S elif stage == 'IT_COARSE_SWEEP': # coarsest sweep # standard sweep workflow: update nodes, compute residual, log progress self.hooks.pre_sweep(step=S, level_number=len(S.levels) - 1) for k in range(S.levels[-1].params.nsweeps): S.levels[-1].sweep.update_nodes() S.levels[-1].sweep.compute_residual() self.hooks.post_sweep(step=S, level_number=len(S.levels) - 1) # update stage and return S.status.stage = 'IT_COARSE_SEND' return S elif stage == 'IT_COARSE_SEND': # send forward coarsest values # try to send new values (if old ones have not been picked up yet, retry) if not S.levels[-1].tag or S.status.last or S.next.status.done: self.logger.debug('Process %2i provides data on level %2i with tag %s' % (S.status.slot, len(S.levels) - 1, True)) self.send(S.levels[-1], tag=True) # update stage S.status.stage = 'IT_DOWN' else: S.status.stage = 'IT_COARSE_SEND' # return return S elif stage == 'IT_DOWN': # prolong corrections own to finest level # receive and sweep on middle levels (except for coarsest level) for l in range(len(S.levels) - 1, 0, -1): # if applicable, try to receive values from IT_UP, otherwise abort if self.params.fine_comm and not S.status.first and not S.prev.status.done: if S.prev.levels[l - 1].tag: self.logger.debug('Process %2i receives from %2i on level %2i with tag %s' % (S.status.slot, S.prev.status.slot, l - 1, True)) self.recv(S.levels[l - 1], S.prev.levels[l - 1]) S.prev.levels[l - 1].tag = False else: raise CommunicationError('Sending failed during IT_DOWN') # prolong values S.transfer(source=S.levels[l], target=S.levels[l - 1]) # on middle levels: do sweep as usual if l - 1 > 0: self.hooks.pre_sweep(step=S, level_number=l - 1) for k in range(S.levels[l - 1].params.nsweeps): S.levels[l - 1].sweep.update_nodes() S.levels[l - 1].sweep.compute_residual() self.hooks.post_sweep(step=S, level_number=l - 1) # update stage and return S.status.stage = 'IT_FINE_SWEEP' return S else: raise ControllerError('Unknown stage, got %s' % S.status.stage)