def define_correlation_tasks(all_conf, comm, size, rank): p = define_correlationpairs(all_conf.source_config ['project_path'], all_conf.auto_corr) if rank == 0 and all_conf.config['verbose']: print('Nr of station pairs %g ' % len(p)) # Remove pairs for which no observation is available obs_only = all_conf.source_config['model_observed_only'] if obs_only: if all_conf.steplengthrun: directory = os.path.join(all_conf.source_config['source_path'], 'observed_correlations_slt') else: directory = os.path.join(all_conf.source_config['source_path'], 'observed_correlations') if rank == 0: # split p into size lists for comm.scatter() p_split = np.array_split(p, size) p_split = [k.tolist() for k in p_split] else: p_split = None # scatter p_split to ranks p_split = comm.scatter(p_split, root=0) p_split = rem_no_obs(p_split, all_conf.source_config, directory=directory) # gather all on rank 0 p_new = comm.gather(list(p_split), root=0) # put all back into one array p if rank == 0: p = [i for j in p_new for i in j] # broadcast p to all ranks p = comm.bcast(p, root=0) if rank == 0 and all_conf.config['verbose']: print('Nr station pairs after checking available observ. %g ' % len(p)) # Remove pairs that have already been calculated p = rem_fin_prs(p, all_conf.source_config, all_conf.step) if rank == 0 and all_conf.config['verbose']: print('Nr station pairs after checking already calculated ones %g' % len(p)) print(16 * '*') # The assignment of station pairs should be such that one core has as # many occurrences of the same station as possible; # this will prevent that many processes try to read from the same hdf5 # file all at once. num_pairs = int(ceil(float(len(p)) / float(size))) p_p = p[rank * num_pairs: rank * num_pairs + num_pairs] return(p_p, num_pairs, len(p))
def run_corr(source_configfile, step, kernelrun=False, steplengthrun=False, ignore_network=False): # simple embarrassingly parallel run: comm = MPI.COMM_WORLD size = comm.Get_size() rank = comm.Get_rank() step = int(step) #ToDo think about that configuration decorator source_config = json.load(open(source_configfile)) obs_only = source_config['model_observed_only'] #conf = json.load(open(os.path.join(source_conf['project_path'],'config.json'))) p = define_correlationpairs(source_config['project_path']) if rank == 0: print('Nr all possible correlation pairs %g ' % len(p)) # Remove pairs for which no observation is available if obs_only: directory = os.path.join(source_config['source_path'], 'observed_correlations') p = rem_no_obs(p, source_config, directory=directory) if rank == 0: print('Nr correlation pairs after checking available observ. %g ' % len(p)) if steplengthrun: directory = os.path.join(source_config['source_path'], 'step_' + str(step), 'obs_slt') p = rem_no_obs(p, source_config, directory=directory) if rank == 0: print('Nr correlation pairs after checking available observ. %g ' % len(p)) # Remove pairs that have already been calculated p = rem_fin_prs(p, source_config, step, kernelrun) if rank == 0: print( 'Nr correlation pairs after checking already calculated ones %g ' % len(p)) print(16 * '*') # for each pair: #TRY # get the paths to the wavefield files and the noise source file and the output (preliminary correlation and or integrated correlation) # is the 'preliminary run' necessary? # combine the preliminary correlation with the source spectrum #EXCEPT # - files not found? # The assignment of station pairs should be such that one core has as many occurrences of the same station as possible; # this will prevent that many processes try to access the same hdf5 file all at once. num_pairs = int(ceil(float(len(p)) / float(size))) p_p = p[rank * num_pairs:rank * num_pairs + num_pairs] print('Rank number %g' % rank) print('working on pair nr. %g to %g of %g.' % (rank * num_pairs, rank * num_pairs + num_pairs, len(p))) for cp in p_p: try: wf1, wf2, src, adjt = paths_input(cp, source_config, step, kernelrun, ignore_network) print(wf1, wf2, src) kernel, corr = paths_output(cp, source_config, step) except: print('Could not determine correlation for: %s\ \nCheck if wavefield .h5 file is available.' % cp) continue if os.path.exists(corr) and not kernelrun: continue if os.path.exists(kernel) and kernelrun: continue for asr in adjt: if not os.path.exists(asr) and kernelrun: print('No adjoint source found for:') print(os.path.basename(asr)) continue #if int(step) == 0: # if source_config['ktype'] == 'td': #print('Time domain preliminary kernel...') g1g2_corr(wf1, wf2, corr, kernel, adjt, src, source_config, kernelrun=kernelrun)
def run_corr(source_configfile,step,steplengthrun=False,ignore_network=False): # simple embarrassingly parallel run: comm = MPI.COMM_WORLD size = comm.Get_size() rank = comm.Get_rank() step = int(step) # get configuration source_config=json.load(open(source_configfile)) obs_only = source_config['model_observed_only'] insta = json.load(open(os.path.join(source_config['project_path'], 'config.json')))['instaseis'] auto_corr = False # default value try: auto_corr = source_config['get_auto_corr'] except KeyError: pass # get possible station pairs p = define_correlationpairs(source_config['project_path'], auto_corr=auto_corr) if rank == 0: print('Nr all possible correlation pairs %g ' %len(p)) # Remove pairs for which no observation is available if obs_only and not steplengthrun: directory = os.path.join(source_config['source_path'],'observed_correlations') p = rem_no_obs(p,source_config,directory=directory) if rank == 0: print('Nr correlation pairs after checking available observ. %g ' %len(p)) if steplengthrun: directory = os.path.join(source_config['source_path'], 'step_'+str(step),'obs_slt') p = rem_no_obs(p,source_config,directory=directory) if rank == 0: print('Nr correlation pairs after checking available observ. %g ' %len(p)) # Remove pairs that have already been calculated p = rem_fin_prs(p,source_config,step) if rank == 0: print('Nr correlation pairs after checking already calculated ones %g ' %len(p)) print(16*'*') # The assignment of station pairs should be such that one core has as # many occurrences of the same station as possible; # this will prevent that many processes try to access the same hdf5 # file all at once. num_pairs = int( ceil(float(len(p))/float(size)) ) p_p = p[ rank*num_pairs : rank*num_pairs + num_pairs] print('Rank number %g' %rank) print('working on pair nr. %g to %g of %g.' %(rank*num_pairs, rank*num_pairs+num_pairs,len(p))) for cp in p_p: # try except is used here because of the massively parallel loop. # it needs to tolerate a couple of messups (e.g. a wavefield is # requested that isn't in the database) # if unknown errors occur and no correlations are computed, comment try- # except to see the error messages. #try: wf1,wf2,src = paths_input(cp,source_config, step,ignore_network,insta) print(wf1,wf2,src) corr = path_output(cp,source_config,step) print(corr) #except: # print('Could not determine correlation for: %s\ # \nCheck if wavefield .h5 file is available.' %cp) # continue if os.path.exists(corr): continue g1g2_corr(wf1,wf2,corr,src,source_config,insta=insta) return()
def run_corr(source_configfile, step, steplengthrun=False, ignore_network=False): # simple embarrassingly parallel run: comm = MPI.COMM_WORLD size = comm.Get_size() rank = comm.Get_rank() step = int(step) # get configuration source_config = json.load(open(source_configfile)) obs_only = source_config['model_observed_only'] insta = json.load( open(os.path.join(source_config['project_path'], 'config.json')))['instaseis'] auto_corr = False # default value try: auto_corr = source_config['get_auto_corr'] except KeyError: pass # get possible station pairs p = define_correlationpairs(source_config['project_path'], auto_corr=auto_corr) if rank == 0: print('Nr all possible correlation pairs %g ' % len(p)) # Remove pairs for which no observation is available if obs_only and not steplengthrun: directory = os.path.join(source_config['source_path'], 'observed_correlations') p = rem_no_obs(p, source_config, directory=directory) if rank == 0: print('Nr correlation pairs after checking available observ. %g ' % len(p)) if steplengthrun: directory = os.path.join(source_config['source_path'], 'step_' + str(step), 'obs_slt') p = rem_no_obs(p, source_config, directory=directory) if rank == 0: print('Nr correlation pairs after checking available observ. %g ' % len(p)) # Remove pairs that have already been calculated p = rem_fin_prs(p, source_config, step) if rank == 0: print( 'Nr correlation pairs after checking already calculated ones %g ' % len(p)) print(16 * '*') # The assignment of station pairs should be such that one core has as # many occurrences of the same station as possible; # this will prevent that many processes try to access the same hdf5 # file all at once. num_pairs = int(ceil(float(len(p)) / float(size))) p_p = p[rank * num_pairs:rank * num_pairs + num_pairs] print('Rank number %g' % rank) print('working on pair nr. %g to %g of %g.' % (rank * num_pairs, rank * num_pairs + num_pairs, len(p))) for cp in p_p: # try except is used here because of the massively parallel loop. # it needs to tolerate a couple of messups (e.g. a wavefield is # requested that isn't in the database) # if unknown errors occur and no correlations are computed, comment try- # except to see the error messages. #try: wf1, wf2, src = paths_input(cp, source_config, step, ignore_network, insta) print(wf1, wf2, src) corr = path_output(cp, source_config, step) print(corr) #except: # print('Could not determine correlation for: %s\ # \nCheck if wavefield .h5 file is available.' %cp) # continue if os.path.exists(corr): continue g1g2_corr(wf1, wf2, corr, src, source_config, insta=insta) return ()