Exemplo n.º 1
0
def define_correlation_tasks(all_conf, comm, size, rank):

    p = define_correlationpairs(all_conf.source_config
                                ['project_path'],
                                all_conf.auto_corr)
    if rank == 0 and all_conf.config['verbose']:
        print('Nr of station pairs %g ' % len(p))

    # Remove pairs for which no observation is available
    obs_only = all_conf.source_config['model_observed_only']
    if obs_only:
        if all_conf.steplengthrun:
            directory = os.path.join(all_conf.source_config['source_path'],
                                     'observed_correlations_slt')
        else:
            directory = os.path.join(all_conf.source_config['source_path'],
                                     'observed_correlations')
        if rank == 0:
            # split p into size lists for comm.scatter()
            p_split = np.array_split(p, size)
            p_split = [k.tolist() for k in p_split]
        else:
            p_split = None

        # scatter p_split to ranks
        p_split = comm.scatter(p_split, root=0)
        p_split = rem_no_obs(p_split, all_conf.source_config,
                             directory=directory)

        # gather all on rank 0
        p_new = comm.gather(list(p_split), root=0)

        # put all back into one array p
        if rank == 0:
            p = [i for j in p_new for i in j]

        # broadcast p to all ranks
        p = comm.bcast(p, root=0)
        if rank == 0 and all_conf.config['verbose']:
            print('Nr station pairs after checking available observ. %g '
                  % len(p))

    # Remove pairs that have already been calculated
    p = rem_fin_prs(p, all_conf.source_config, all_conf.step)
    if rank == 0 and all_conf.config['verbose']:
        print('Nr station pairs after checking already calculated ones %g'
              % len(p))
        print(16 * '*')

    # The assignment of station pairs should be such that one core has as
    # many occurrences of the same station as possible;
    # this will prevent that many processes try to read from the same hdf5
    # file all at once.
    num_pairs = int(ceil(float(len(p)) / float(size)))

    p_p = p[rank * num_pairs: rank * num_pairs + num_pairs]

    return(p_p, num_pairs, len(p))
Exemplo n.º 2
0
def test_forward_model():
    class args(object):
        def __init__(self):
            self.source_model = os.path.join('test', 'testdata_v1',
                                             'testsource_v1')
            self.step = 0
            self.steplengthrun = False,
            self.ignore_network = True

    args = args()
    all_config = config_params(args, comm, size, rank)
    assert all_config.auto_corr

    ns = get_ns(all_config)
    assert (ns[0] == 3600)
    assert (ns[1] == 7200)

    p = define_correlationpairs(all_config.source_config['project_path'],
                                all_config.auto_corr)
    assert len(p) == 3
    assert p[0][0].split()[-1] == 'STA1'

    input_files = add_input_files(p[1], all_config)[0]
    assert os.path.basename(input_files[0]) == 'NET.STA1..MXZ.h5'

    nsrc = os.path.join('test', 'testdata_v1', 'testsource_v1', 'iteration_0',
                        'starting_model.h5')
    # use a one-sided taper: The seismogram probably has a non-zero end,
    # being cut off wherever the solver stopped running.
    taper = cosine_taper(ns[0], p=0.01)
    taper[0:ns[0] // 2] = 1.0
    correlation, sta1, sta2 = compute_correlation(input_files, all_config,
                                                  NoiseSource(nsrc), ns, taper)
    corr_saved = np.load(
        os.path.join('test', 'testdata_v1', 'testdata',
                     'NET.STA1..MXZ--NET.STA2..MXZ.npy'))

    assert np.allclose(correlation, corr_saved)
Exemplo n.º 3
0
def run_corr(source_configfile,
             step,
             kernelrun=False,
             steplengthrun=False,
             ignore_network=False):

    # simple embarrassingly parallel run:

    comm = MPI.COMM_WORLD
    size = comm.Get_size()
    rank = comm.Get_rank()

    step = int(step)

    #ToDo think about that configuration decorator
    source_config = json.load(open(source_configfile))
    obs_only = source_config['model_observed_only']

    #conf = json.load(open(os.path.join(source_conf['project_path'],'config.json')))

    p = define_correlationpairs(source_config['project_path'])
    if rank == 0:
        print('Nr all possible correlation pairs %g ' % len(p))

    # Remove pairs for which no observation is available
    if obs_only:
        directory = os.path.join(source_config['source_path'],
                                 'observed_correlations')
        p = rem_no_obs(p, source_config, directory=directory)
        if rank == 0:
            print('Nr correlation pairs after checking available observ. %g ' %
                  len(p))
    if steplengthrun:
        directory = os.path.join(source_config['source_path'],
                                 'step_' + str(step), 'obs_slt')
        p = rem_no_obs(p, source_config, directory=directory)
        if rank == 0:
            print('Nr correlation pairs after checking available observ. %g ' %
                  len(p))

    # Remove pairs that have already been calculated
    p = rem_fin_prs(p, source_config, step, kernelrun)
    if rank == 0:
        print(
            'Nr correlation pairs after checking already calculated ones %g ' %
            len(p))
        print(16 * '*')
    # for each pair:

    #TRY
    # get the paths to the wavefield files and the noise source file and the output (preliminary correlation and or integrated correlation)
    # is the 'preliminary run' necessary?
    # combine the preliminary correlation with the source spectrum
    #EXCEPT
    # - files not found?

    # The assignment of station pairs should be such that one core has as many occurrences of the same station as possible;
    # this will prevent that many processes try to access the same hdf5 file all at once.
    num_pairs = int(ceil(float(len(p)) / float(size)))
    p_p = p[rank * num_pairs:rank * num_pairs + num_pairs]

    print('Rank number %g' % rank)
    print('working on pair nr. %g to %g of %g.' %
          (rank * num_pairs, rank * num_pairs + num_pairs, len(p)))

    for cp in p_p:

        try:
            wf1, wf2, src, adjt = paths_input(cp, source_config, step,
                                              kernelrun, ignore_network)
            print(wf1, wf2, src)

            kernel, corr = paths_output(cp, source_config, step)

        except:
            print('Could not determine correlation for: %s\
             \nCheck if wavefield .h5 file is available.' % cp)
            continue

        if os.path.exists(corr) and not kernelrun:
            continue

        if os.path.exists(kernel) and kernelrun:
            continue

        for asr in adjt:
            if not os.path.exists(asr) and kernelrun:
                print('No adjoint source found for:')
                print(os.path.basename(asr))
                continue

            #if int(step) == 0:
            # if source_config['ktype'] == 'td':

            #print('Time domain preliminary kernel...')
        g1g2_corr(wf1,
                  wf2,
                  corr,
                  kernel,
                  adjt,
                  src,
                  source_config,
                  kernelrun=kernelrun)
Exemplo n.º 4
0
def run_corr(source_configfile,step,steplengthrun=False,ignore_network=False):


    # simple embarrassingly parallel run:

    comm = MPI.COMM_WORLD
    size = comm.Get_size()
    rank = comm.Get_rank()

    step = int(step)

    # get configuration
    source_config=json.load(open(source_configfile))
    obs_only = source_config['model_observed_only']
    insta = json.load(open(os.path.join(source_config['project_path'],
        'config.json')))['instaseis']
    auto_corr = False # default value
    try:
        auto_corr = source_config['get_auto_corr']
    except KeyError:
        pass

    # get possible station pairs
    p = define_correlationpairs(source_config['project_path'],
        auto_corr=auto_corr)
    if rank == 0:
        print('Nr all possible correlation pairs %g ' %len(p))
    
    # Remove pairs for which no observation is available
    if obs_only and not steplengthrun:
        directory = os.path.join(source_config['source_path'],'observed_correlations')
        p = rem_no_obs(p,source_config,directory=directory)
        if rank == 0:
            print('Nr correlation pairs after checking available observ. %g ' %len(p))
    if steplengthrun:
        directory = os.path.join(source_config['source_path'],
            'step_'+str(step),'obs_slt')
        p = rem_no_obs(p,source_config,directory=directory)
        if rank == 0:
            print('Nr correlation pairs after checking available observ. %g ' %len(p))

    # Remove pairs that have already been calculated
    p = rem_fin_prs(p,source_config,step)
    if rank == 0:
        print('Nr correlation pairs after checking already calculated ones %g ' %len(p))
        print(16*'*')    
    

    # The assignment of station pairs should be such that one core has as
    # many occurrences of the same station as possible; 
    # this will prevent that many processes try to access the same hdf5 
    # file all at once.
    num_pairs = int( ceil(float(len(p))/float(size)) )
    p_p = p[ rank*num_pairs : rank*num_pairs + num_pairs] 
    
    print('Rank number %g' %rank)
    print('working on pair nr. %g to %g of %g.' %(rank*num_pairs,
        rank*num_pairs+num_pairs,len(p)))

    for cp in p_p:
        
        # try except is used here because of the massively parallel loop. 
        # it needs to tolerate a couple of messups (e.g. a wavefield is 
        # requested that isn't in the database)
        # if unknown errors occur and no correlations are computed, comment try-
        # except to see the error messages.
        #try:
        wf1,wf2,src = paths_input(cp,source_config,
                step,ignore_network,insta)
        print(wf1,wf2,src)
        corr = path_output(cp,source_config,step)
        print(corr) 
        #except:
         #   print('Could not determine correlation for: %s\
          #   \nCheck if wavefield .h5 file is available.' %cp)
           # continue
            
        if os.path.exists(corr):
            continue

        g1g2_corr(wf1,wf2,corr,src,source_config,insta=insta)

    return()
Exemplo n.º 5
0
def run_corr(source_configfile,
             step,
             steplengthrun=False,
             ignore_network=False):

    # simple embarrassingly parallel run:

    comm = MPI.COMM_WORLD
    size = comm.Get_size()
    rank = comm.Get_rank()

    step = int(step)

    # get configuration
    source_config = json.load(open(source_configfile))
    obs_only = source_config['model_observed_only']
    insta = json.load(
        open(os.path.join(source_config['project_path'],
                          'config.json')))['instaseis']
    auto_corr = False  # default value
    try:
        auto_corr = source_config['get_auto_corr']
    except KeyError:
        pass

    # get possible station pairs
    p = define_correlationpairs(source_config['project_path'],
                                auto_corr=auto_corr)
    if rank == 0:
        print('Nr all possible correlation pairs %g ' % len(p))

    # Remove pairs for which no observation is available
    if obs_only and not steplengthrun:
        directory = os.path.join(source_config['source_path'],
                                 'observed_correlations')
        p = rem_no_obs(p, source_config, directory=directory)
        if rank == 0:
            print('Nr correlation pairs after checking available observ. %g ' %
                  len(p))
    if steplengthrun:
        directory = os.path.join(source_config['source_path'],
                                 'step_' + str(step), 'obs_slt')
        p = rem_no_obs(p, source_config, directory=directory)
        if rank == 0:
            print('Nr correlation pairs after checking available observ. %g ' %
                  len(p))

    # Remove pairs that have already been calculated
    p = rem_fin_prs(p, source_config, step)
    if rank == 0:
        print(
            'Nr correlation pairs after checking already calculated ones %g ' %
            len(p))
        print(16 * '*')

    # The assignment of station pairs should be such that one core has as
    # many occurrences of the same station as possible;
    # this will prevent that many processes try to access the same hdf5
    # file all at once.
    num_pairs = int(ceil(float(len(p)) / float(size)))
    p_p = p[rank * num_pairs:rank * num_pairs + num_pairs]

    print('Rank number %g' % rank)
    print('working on pair nr. %g to %g of %g.' %
          (rank * num_pairs, rank * num_pairs + num_pairs, len(p)))

    for cp in p_p:

        # try except is used here because of the massively parallel loop.
        # it needs to tolerate a couple of messups (e.g. a wavefield is
        # requested that isn't in the database)
        # if unknown errors occur and no correlations are computed, comment try-
        # except to see the error messages.
        #try:
        wf1, wf2, src = paths_input(cp, source_config, step, ignore_network,
                                    insta)
        print(wf1, wf2, src)
        corr = path_output(cp, source_config, step)
        print(corr)
        #except:
        #   print('Could not determine correlation for: %s\
        #   \nCheck if wavefield .h5 file is available.' %cp)
        # continue

        if os.path.exists(corr):
            continue

        g1g2_corr(wf1, wf2, corr, src, source_config, insta=insta)

    return ()