Exemplo n.º 1
0
    def train_on_files(self,
                       shot_files,
                       use_shots,
                       all_machines,
                       verbose=False):
        conf = self.conf
        all_signals = conf['paths']['all_signals']
        shot_list = ShotList()
        shot_list.load_from_shot_list_files_objects(shot_files, all_signals)
        shot_list_picked = shot_list.random_sublist(use_shots)

        previously_saved, machines_saved = self.previously_saved_stats()
        machines_to_compute = all_machines - machines_saved
        recompute = conf['data']['recompute_normalization']
        if recompute:
            machines_to_compute = all_machines
            previously_saved = False
        if not previously_saved or len(machines_to_compute) > 0:
            if previously_saved:
                self.load_stats(verbose=True)
            print('computing normalization for machines {}'.format(
                machines_to_compute))
            use_cores = max(1, mp.cpu_count() - 2)
            pool = mp.Pool(use_cores)
            print('running in parallel on {} processes'.format(
                pool._processes))
            start_time = time.time()

            for (i, stats) in enumerate(
                    pool.imap_unordered(self.train_on_single_shot,
                                        shot_list_picked)):
                # for (i,stats) in
                # enumerate(map(self.train_on_single_shot,shot_list_picked)):
                if stats.machine in machines_to_compute:
                    self.incorporate_stats(stats)
                    self.machines.add(stats.machine)
                sys.stdout.write('\r' +
                                 '{}/{}'.format(i, len(shot_list_picked)))
            pool.close()
            pool.join()
            print(
                '\nFinished Training Normalizer on ',
                '{} files in {} seconds'.format(len(shot_list_picked),
                                                time.time() - start_time))
            self.save_stats(verbose=True)
        else:
            self.load_stats(verbose=verbose)
        # print representation of trained Normalizer to stdout:
        # Machine, NormalizerName, per-signal normalization stats/params
        if verbose:
            g.print_unique(self)
Exemplo n.º 2
0
def guarantee_preprocessed(conf, verbose=False):
    pp = Preprocessor(conf)
    if pp.all_are_preprocessed():
        if verbose:
            g.print_unique("shots already processed.")
        (shot_list_train, shot_list_validate,
         shot_list_test) = pp.load_shotlists()
    else:
        if verbose:
            g.print_unique("preprocessing all shots...")  # , end='')
        pp.clean_shot_lists()
        shot_list = pp.preprocess_all()
        shot_list.sort()
        shot_list_train, shot_list_test = shot_list.split_train_test(conf)
        # num_shots = len(shot_list_train) + len(shot_list_test)
        validation_frac = conf['training']['validation_frac']
        if validation_frac <= 0.05:
            if verbose:
                g.print_unique('Setting validation to a minimum of 0.05')
            validation_frac = 0.05
        shot_list_train, shot_list_validate = shot_list_train.split_direct(
            1.0 - validation_frac, do_shuffle=True)
        pp.save_shotlists(shot_list_train, shot_list_validate, shot_list_test)
    shot_list_train, shot_list_validate, shot_list_test = apply_bleed_in(
        conf, shot_list_train, shot_list_validate, shot_list_test)
    if verbose:
        print_shot_list_sizes(shot_list_train, shot_list_validate,
                              shot_list_test)
        g.print_unique("...done")
    #    g.print_unique("...printing test shot list:")
    #    for s in shot_list_test:
    #       g.print_unique(str(s.number))
    return shot_list_train, shot_list_validate, shot_list_test
Exemplo n.º 3
0
def fetch_jet_data(signal_path, shot_num, c):
    found = False
    time = np.array([0])
    ydata = None
    data = np.array([0])
    try:
        data = c.get('_sig=jet("{}/",{})'.format(signal_path, shot_num)).data()
        if np.ndim(data) == 2:
            data = np.transpose(data)
            time = c.get('_sig=dim_of(jet("{}/",{}),1)'.format(
                signal_path, shot_num)).data()
            ydata = c.get('_sig=dim_of(jet("{}/",{}),0)'.format(
                signal_path, shot_num)).data()
        else:
            time = c.get('_sig=dim_of(jet("{}/",{}))'.format(
                signal_path, shot_num)).data()
        found = True
    except Exception as e:
        g.print_unique(e)
        sys.stdout.flush()
        # pass
    return time, data, ydata, found
Exemplo n.º 4
0
def print_shot_list_sizes(shot_list_train,
                          shot_list_validate,
                          shot_list_test=None):
    nshots = len(shot_list_train) + len(shot_list_validate)
    nshots_disrupt = (shot_list_train.num_disruptive() +
                      shot_list_validate.num_disruptive())
    if shot_list_test is not None:
        nshots += len(shot_list_test)
        nshots_disrupt += shot_list_test.num_disruptive()
    g.print_unique('total: {} shots, {} disruptive'.format(
        nshots, nshots_disrupt))
    g.print_unique('training: {} shots, {} disruptive'.format(
        len(shot_list_train), shot_list_train.num_disruptive()))
    g.print_unique('validate: {} shots, {} disruptive'.format(
        len(shot_list_validate), shot_list_validate.num_disruptive()))
    if shot_list_test is not None:
        g.print_unique('testing: {} shots, {} disruptive'.format(
            len(shot_list_test), shot_list_test.num_disruptive()))
    return
Exemplo n.º 5
0
    from plasma.preprocessor.normalize import (AveragingVarNormalizer as
                                               Normalizer)
else:
    print('unkown normalizer. exiting')
    exit(1)

# set PRNG seed, unique for each worker, based on MPI task index for
# reproducible shuffling in guranteed_preprocessed() and training steps
np.random.seed(g.task_index)
random.seed(g.task_index)

only_predict = len(sys.argv) > 1
custom_path = None
if only_predict:
    custom_path = sys.argv[1]
    g.print_unique("predicting using path {}".format(custom_path))

#####################################################
#                 NORMALIZATION                     #
#####################################################
normalizer = Normalizer(conf)
if g.task_index == 0:
    # make sure preprocessing has been run, and results are saved to files
    # if not, only master MPI rank spawns thread pool to perform preprocessing
    (shot_list_train, shot_list_validate,
     shot_list_test) = guarantee_preprocessed(conf)
    # similarly, train normalizer (if necessary) w/ master MPI rank only
    normalizer.train()  # verbose=False only suppresses if purely loading
g.comm.Barrier()
g.print_unique("begin preprocessor+normalization (all MPI ranks)...")
# second call has ALL MPI ranks load preprocessed shots from .npz files
Exemplo n.º 6
0
    from plasma.preprocessor.normalize import (AveragingVarNormalizer as
                                               Normalizer)
else:
    print('unkown normalizer. exiting')
    exit(1)

# set PRNG seed, unique for each worker, based on MPI task index for
# reproducible shuffling in guranteed_preprocessed() and training steps
np.random.seed(g.task_index)
random.seed(g.task_index)

only_predict = len(sys.argv) > 1
custom_path = None
if only_predict:
    custom_path = sys.argv[1]
    g.print_unique("predicting using path {}".format(custom_path))

#####################################################
#                 NORMALIZATION                     #
#####################################################
normalizer = Normalizer(conf)
if g.task_index == 0:
    # make sure preprocessing has been run, and results are saved to files
    # if not, only master MPI rank spawns thread pool to perform preprocessing
    (shot_list_train, shot_list_validate,
     shot_list_test) = guarantee_preprocessed(conf)
    # similarly, train normalizer (if necessary) w/ master MPI rank only
    normalizer.train()  # verbose=False only suppresses if purely loading
g.comm.Barrier()
g.print_unique("begin preprocessor+normalization (all MPI ranks)...")
# second call has ALL MPI ranks load preprocessed shots from .npz files
Exemplo n.º 7
0
def parameters(input_file):
    """Parse yaml file of configuration parameters."""
    # TODO(KGF): the following line imports TensorFlow as a Keras backend
    # by default (absent env variable KERAS_BACKEND and/or config file
    # $HOME/.keras/keras.json) "from plasma.conf import conf"
    # via "import keras.backend as K" in targets.py
    from plasma.models.targets import (HingeTarget, MaxHingeTarget,
                                       BinaryTarget, TTDTarget, TTDInvTarget,
                                       TTDLinearTarget)
    with open(input_file, 'r') as yaml_file:
        params = yaml.load(yaml_file, Loader=yaml.SafeLoader)
        params['user_name'] = getpass.getuser()
        output_path = params['fs_path'] + "/" + params['user_name']
        base_path = output_path

        params['paths']['base_path'] = base_path
        if isinstance(params['paths']['signal_prepath'], list):
            print(
                'reading from multiple data folder!**********************************************'
            )

            params['paths']['signal_prepath'] = [
                base_path + s for s in params['paths']['signal_prepath']
            ]
        else:
            params['paths']['signal_prepath'] = base_path + params['paths'][
                'signal_prepath']
        params['paths']['shot_list_dir'] = (base_path +
                                            params['paths']['shot_list_dir'])
        params['paths']['output_path'] = output_path
        if params['paths']['data'] == 'd3d_data_gar18':
            h = myhash_signals(sig.all_signals_gar18.values())
        if params['paths']['data'] == 'd3d_data_n1rms':
            h = myhash_signals(sig.all_signals_n1rms.values())
        elif params['paths']['data'] == 'd3d_data_garbage':
            h = myhash_signals(sig.all_signals_gar18.values()) * 2
        elif params['paths']['data'] == 'd3d_data_real_time':
            h = myhash_signals(sig.all_signals_real_time.values())
        elif params['paths']['data'] == 'd3d_data_real_time_0D':
            h = myhash_signals(sig.all_signals_real_time_0D.values())
        else:
            h = myhash_signals(sig.all_signals.values())
        params['paths']['global_normalizer_path'] = (
            output_path +
            '/normalization/normalization_signal_group_{}.npz'.format(h))
        if params['training']['hyperparam_tuning']:
            # params['paths']['saved_shotlist_path'] =
            # './normalization/shot_lists.npz'
            params['paths']['normalizer_path'] = (
                './normalization/normalization_signal_group_{}.npz'.format(h))
            params['paths']['model_save_path'] = './model_checkpoints/'
            params['paths']['csvlog_save_path'] = './csv_logs/'
            params['paths']['results_prepath'] = './results/'
        else:
            # params['paths']['saved_shotlist_path'] = output_path +
            # '/normalization/shot_lists.npz'
            params['paths']['normalizer_path'] = (
                params['paths']['global_normalizer_path'])
            params['paths']['model_save_path'] = (output_path +
                                                  '/model_checkpoints/')
            params['paths']['csvlog_save_path'] = output_path + '/csv_logs/'
            params['paths']['results_prepath'] = output_path + '/results/'
        params['paths']['tensorboard_save_path'] = (
            output_path + params['paths']['tensorboard_save_path'])
        params['paths']['saved_shotlist_path'] = (
            params['paths']['base_path'] + '/processed_shotlists/' +
            params['paths']['data'] +
            '/shot_lists_signal_group_{}.npz'.format(h))
        params['paths']['processed_prepath'] = (output_path +
                                                '/processed_shots/' +
                                                'signal_group_{}/'.format(h))
        # ensure shallow model has +1 -1 target.
        if params['model']['shallow'] or params['target'] == 'hinge':
            params['data']['target'] = HingeTarget
        elif params['target'] == 'maxhinge':
            MaxHingeTarget.fac = params['data']['positive_example_penalty']
            params['data']['target'] = MaxHingeTarget
        elif params['target'] == 'binary':
            params['data']['target'] = BinaryTarget
        elif params['target'] == 'ttd':
            params['data']['target'] = TTDTarget
        elif params['target'] == 'ttdinv':
            params['data']['target'] = TTDInvTarget
        elif params['target'] == 'ttdlinear':
            params['data']['target'] = TTDLinearTarget
        else:
            g.print_unique('Unkown type of target. Exiting')
            exit(1)

        # params['model']['output_activation'] =
        # params['data']['target'].activation
        # binary crossentropy performs slightly better?
        # params['model']['loss'] = params['data']['target'].loss

        # signals
        if params['paths']['data'] in ['d3d_data_gar18', 'd3d_data_garbage']:
            params['paths']['all_signals_dict'] = sig.all_signals_gar18
        elif params['paths']['data'] == 'd3d_data_n1rms':
            params['paths']['all_signals_dict'] = sig.all_signals_n1rms
        elif params['paths']['data'] == 'd3d_data_real_time':
            params['paths']['all_signals_dict'] = sig.all_signals_real_time
        elif params['paths']['data'] == 'd3d_data_real_time_0D':
            params['paths']['all_signals_dict'] = sig.all_signals_real_time_0D

        else:
            params['paths']['all_signals_dict'] = sig.all_signals

        # assert order
        # q95, li, ip, lm, betan, energy, dens, pradcore, pradedge, pin,
        # pechin, torquein, ipdirect, etemp_profile, edens_profile

        # shot lists
        jet_carbon_wall = ShotListFiles(sig.jet,
                                        params['paths']['shot_list_dir'],
                                        ['CWall_clear.txt', 'CFC_unint.txt'],
                                        'jet carbon wall data')
        jet_iterlike_wall = ShotListFiles(
            sig.jet, params['paths']['shot_list_dir'],
            ['ILW_unint.txt', 'BeWall_clear.txt'], 'jet iter like wall data')
        jet_iterlike_wall_late = ShotListFiles(
            sig.jet, params['paths']['shot_list_dir'],
            ['ILW_unint_late.txt', 'ILW_clear_late.txt'],
            'Late jet iter like wall data')
        # jet_iterlike_wall_full = ShotListFiles(
        #     sig.jet, params['paths']['shot_list_dir'],
        #     ['ILW_unint_full.txt', 'ILW_clear_full.txt'],
        #     'Full jet iter like wall data')

        jenkins_jet_carbon_wall = ShotListFiles(
            sig.jet, params['paths']['shot_list_dir'],
            ['jenkins_CWall_clear.txt', 'jenkins_CFC_unint.txt'],
            'Subset of jet carbon wall data for Jenkins tests')
        jenkins_jet_iterlike_wall = ShotListFiles(
            sig.jet, params['paths']['shot_list_dir'],
            ['jenkins_ILW_unint.txt', 'jenkins_BeWall_clear.txt'],
            'Subset of jet iter like wall data for Jenkins tests')

        jet_full = ShotListFiles(sig.jet, params['paths']['shot_list_dir'], [
            'ILW_unint.txt', 'BeWall_clear.txt', 'CWall_clear.txt',
            'CFC_unint.txt'
        ], 'jet full data')

        # d3d_10000 = ShotListFiles(
        #     sig.d3d, params['paths']['shot_list_dir'],
        #     ['d3d_clear_10000.txt', 'd3d_disrupt_10000.txt'],
        #     'd3d data 10000 ND and D shots')
        # d3d_1000 = ShotListFiles(
        #     sig.d3d, params['paths']['shot_list_dir'],
        #     ['d3d_clear_1000.txt', 'd3d_disrupt_1000.txt'],
        #     'd3d data 1000 ND and D shots')
        # d3d_100 = ShotListFiles(
        #     sig.d3d, params['paths']['shot_list_dir'],
        #     ['d3d_clear_100.txt', 'd3d_disrupt_100.txt'],
        #     'd3d data 100 ND and D shots')
        d3d_full = ShotListFiles(
            sig.d3d, params['paths']['shot_list_dir'],
            ['d3d_clear_data_avail.txt', 'd3d_disrupt_data_avail.txt'],
            'd3d data since shot 125500')
        d3d_full_new = ShotListFiles(
            sig.d3d, params['paths']['shot_list_dir'],
            ['shots_since_2016_clear.txt', 'shots_since_2016_disrupt.txt'],
            'd3d data since shot 125500')
        d3d_jenkins = ShotListFiles(
            sig.d3d, params['paths']['shot_list_dir'],
            ['jenkins_d3d_clear.txt', 'jenkins_d3d_disrupt.txt'],
            'Subset of d3d data for Jenkins test')
        # d3d_jb_full = ShotListFiles(
        #     sig.d3d, params['paths']['shot_list_dir'],
        #     ['shotlist_JaysonBarr_clear.txt',
        #      'shotlist_JaysonBarr_disrupt.txt'],
        #     'd3d shots since 160000-170000')

        # nstx_full = ShotListFiles(
        #     nstx, params['paths']['shot_list_dir'],
        #     ['disrupt_nstx.txt'], 'nstx shots (all are disruptive')

        if params['paths']['data'] == 'jet_data':
            params['paths']['shot_files'] = [jet_carbon_wall]
            params['paths']['shot_files_test'] = [jet_iterlike_wall]
            params['paths']['use_signals_dict'] = sig.jet_signals
        elif params['paths']['data'] == 'jet_data_0D':
            params['paths']['shot_files'] = [jet_carbon_wall]
            params['paths']['shot_files_test'] = [jet_iterlike_wall]
            params['paths']['use_signals_dict'] = sig.jet_signals_0D
        elif params['paths']['data'] == 'jet_data_1D':
            params['paths']['shot_files'] = [jet_carbon_wall]
            params['paths']['shot_files_test'] = [jet_iterlike_wall]
            params['paths']['use_signals_dict'] = sig.jet_signals_1D
        elif params['paths']['data'] == 'jet_data_late':
            params['paths']['shot_files'] = [jet_iterlike_wall_late]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = sig.jet_signals
        elif params['paths']['data'] == 'jet_data_carbon_to_late_0D':
            params['paths']['shot_files'] = [jet_carbon_wall]
            params['paths']['shot_files_test'] = [jet_iterlike_wall_late]
            params['paths']['use_signals_dict'] = sig.jet_signals_0D
        elif params['paths']['data'] == 'jet_data_temp_profile':
            params['paths']['shot_files'] = [jet_carbon_wall]
            params['paths']['shot_files_test'] = [jet_iterlike_wall]
            params['paths']['use_signals_dict'] = {
                'etemp_profile': sig.etemp_profile
            }
        elif params['paths']['data'] == 'jet_data_dens_profile':
            params['paths']['shot_files'] = [jet_carbon_wall]
            params['paths']['shot_files_test'] = [jet_iterlike_wall]
            params['paths']['use_signals_dict'] = {
                'edens_profile': sig.edens_profile
            }
        elif params['paths']['data'] == 'jet_carbon_data':
            params['paths']['shot_files'] = [jet_carbon_wall]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = sig.jet_signals
        elif params['paths']['data'] == 'jet_mixed_data':
            params['paths']['shot_files'] = [jet_full]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = sig.jet_signals
        elif params['paths']['data'] == 'jenkins_jet':
            params['paths']['shot_files'] = [jenkins_jet_carbon_wall]
            params['paths']['shot_files_test'] = [jenkins_jet_iterlike_wall]
            params['paths']['use_signals_dict'] = sig.jet_signals
        # jet data but with fully defined signals
        elif params['paths']['data'] == 'jet_data_fully_defined':
            params['paths']['shot_files'] = [jet_carbon_wall]
            params['paths']['shot_files_test'] = [jet_iterlike_wall]
            params['paths']['use_signals_dict'] = sig.fully_defined_signals
        # jet data but with fully defined signals
        elif params['paths']['data'] == 'jet_data_fully_defined_0D':
            params['paths']['shot_files'] = [jet_carbon_wall]
            params['paths']['shot_files_test'] = [jet_iterlike_wall]
            params['paths']['use_signals_dict'] = sig.fully_defined_signals_0D

        elif params['paths']['data'] == 'd3d_data':
            params['paths']['shot_files'] = [d3d_full]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = {
                'q95': sig.q95,
                'li': sig.li,
                'ip': sig.ip,
                'lm': sig.lm,
                'betan': sig.betan,
                'energy': sig.energy,
                'dens': sig.dens,
                'pradcore': sig.pradcore,
                'pradedge': sig.pradedge,
                'pin': sig.pin,
                'torquein': sig.torquein,
                'ipdirect': sig.ipdirect,
                'iptarget': sig.iptarget,
                'iperr': sig.iperr,
                'etemp_profile': sig.etemp_profile,
                'edens_profile': sig.edens_profile,
            }
        elif params['paths']['data'] in ['d3d_data_gar18', 'd3d_data_garbage']:
            params['paths']['shot_files'] = [d3d_full_new]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = {
                'q95t': sig.q95t,
                'lit': sig.lit,
                'ipt': sig.ipt,
                'lmt': sig.lmt,
                'betant': sig.betant,
                'energyt': sig.energyt,
                'denst': sig.denst,
                'pradcoret': sig.pradcoret,
                'pradedget': sig.pradedget,
                'pint': sig.pint,
                'torqueint': sig.torqueint,
                'ipdirectt': sig.ipdirectt,
                'iptargett': sig.iptargett,
                'iperrt': sig.iperrt,
                'etemp_profilet': sig.etemp_profilet,
                'edens_profilet': sig.edens_profilet,
            }
        elif params['paths']['data'] in ['d3d_data_n1rms']:
            params['paths']['shot_files'] = [d3d_full_new]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = {
                'q95t': sig.q95,
                'lit': sig.li,
                'ipt': sig.ip,
                'lmt': sig.lm,
                'betant': sig.betan,
                'energyt': sig.energy,
                'denst': sig.dens,
                'pradcoret': sig.pradcore,
                'pradedget': sig.pradedge,
                'pint': sig.pin,
                'torqueint': sig.torquein,
                'ipdirectt': sig.ipdirect,
                'iptargett': sig.iptarget,
                'iperrt': sig.iperr,
                'etemp_profilet': sig.etemp_profile,
                'edens_profilet': sig.edens_profile,
                'n1_rms': sig.n1_rms,
            }

        elif params['paths']['data'] == 'd3d_data_new':
            params['paths']['shot_files'] = [d3d_full_new]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = {
                'q95': sig.q95,
                'li': sig.li,
                'ip': sig.ip,
                'lm': sig.lm,
                'betan': sig.betan,
                'energy': sig.energy,
                'dens': sig.dens,
                'pradcore': sig.pradcore,
                'pradedge': sig.pradedge,
                'pin': sig.pin,
                'torquein': sig.torquein,
                'ipdirect': sig.ipdirect,
                'iptarget': sig.iptarget,
                'iperr': sig.iperr,
                'etemp_profile': sig.etemp_profile,
                'edens_profile': sig.edens_profile,
            }
        elif params['paths']['data'] == 'd3d_data_real_time':
            params['paths']['shot_files'] = [d3d_full_new]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = {
                'q95_EFITRT1': sig.q95_EFITRT1,
                'li': sig.li,
                'ip': sig.ip,
                'lm': sig.lm,
                'betan': sig.betan,
                'energy': sig.energy,
                'dens': sig.dens,
                'pradcore': sig.pradcore,
                'pradedge': sig.pradedge,
                'pin': sig.pin,
                'torquein': sig.torquein,
                'ipdirect': sig.ipdirect,
                'iptarget': sig.iptarget,
                'iperr': sig.iperr,
                'etemp_profile': sig.etemp_profile,
                'edens_profile': sig.edens_profile,
            }
        elif params['paths']['data'] == 'd3d_data_real_time_0D':
            params['paths']['shot_files'] = [d3d_full_new]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = {
                'q95_EFITRT1': sig.q95_EFITRT1,
                'li': sig.li,
                'ip': sig.ip,
                'lm': sig.lm,
                'betan': sig.betan,
                'energy': sig.energy,
                'dens': sig.dens,
                'pradcore': sig.pradcore,
                'pradedge': sig.pradedge,
                'pin': sig.pin,
                'vd': sig.vd,
                'torquein': sig.torquein,
                'ipdirect': sig.ipdirect,
                'iptarget': sig.iptarget,
            }

        elif params['paths']['data'] == 'd3d_data_1D':
            params['paths']['shot_files'] = [d3d_full]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = {
                'ipdirect': sig.ipdirect,
                'etemp_profile': sig.etemp_profile,
                'edens_profile': sig.edens_profile,
            }
        elif params['paths']['data'] == 'd3d_data_all_profiles':
            params['paths']['shot_files'] = [d3d_full]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = {
                'ipdirect': sig.ipdirect,
                'etemp_profile': sig.etemp_profile,
                'edens_profile': sig.edens_profile,
                'itemp_profile': sig.itemp_profile,
                'zdens_profile': sig.zdens_profile,
                'trot_profile': sig.trot_profile,
                'pthm_profile': sig.pthm_profile,
                'neut_profile': sig.neut_profile,
                'q_profile': sig.q_profile,
                'bootstrap_current_profile': sig.bootstrap_current_profile,
                'q_psi_profile': sig.q_psi_profile,
            }
        elif params['paths']['data'] == 'd3d_data_0D':
            params['paths']['shot_files'] = [d3d_full]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = {
                'q95': sig.q95,
                'li': sig.li,
                'ip': sig.ip,
                'lm': sig.lm,
                'betan': sig.betan,
                'energy': sig.energy,
                'dens': sig.dens,
                'pradcore': sig.pradcore,
                'pradedge': sig.pradedge,
                'pin': sig.pin,
                'torquein': sig.torquein,
                'ipdirect': sig.ipdirect,
                'iptarget': sig.iptarget,
                'iperr': sig.iperr,
            }
        elif params['paths']['data'] == 'd3d_data_all':
            params['paths']['shot_files'] = [d3d_full]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = sig.d3d_signals
        elif params['paths']['data'] == 'jenkins_d3d':
            params['paths']['shot_files'] = [d3d_jenkins]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = {
                'q95': sig.q95,
                'li': sig.li,
                'ip': sig.ip,
                'lm': sig.lm,
                'betan': sig.betan,
                'energy': sig.energy,
                'dens': sig.dens,
                'pradcore': sig.pradcore,
                'pradedge': sig.pradedge,
                'pin': sig.pin,
                'torquein': sig.torquein,
                'ipdirect': sig.ipdirect,
                'iptarget': sig.iptarget,
                'iperr': sig.iperr,
                'etemp_profile': sig.etemp_profile,
                'edens_profile': sig.edens_profile,
            }
        # jet data but with fully defined signals
        elif params['paths']['data'] == 'd3d_data_fully_defined':
            params['paths']['shot_files'] = [d3d_full]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = sig.fully_defined_signals
        # jet data but with fully defined signals
        elif params['paths']['data'] == 'd3d_data_fully_defined_0D':
            params['paths']['shot_files'] = [d3d_full]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = sig.fully_defined_signals_0D
        elif params['paths']['data'] == 'd3d_data_temp_profile':
            # jet data but with fully defined signals
            params['paths']['shot_files'] = [d3d_full]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = {
                'etemp_profile': sig.etemp_profile
            }  # fully_defined_signals_0D
        elif params['paths']['data'] == 'd3d_data_dens_profile':
            # jet data but with fully defined signals
            params['paths']['shot_files'] = [d3d_full]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = {
                'edens_profile': sig.edens_profile
            }  # fully_defined_signals_0D

        # cross-machine
        elif params['paths']['data'] == 'jet_to_d3d_data':
            params['paths']['shot_files'] = [jet_full]
            params['paths']['shot_files_test'] = [d3d_full]
            params['paths']['use_signals_dict'] = sig.fully_defined_signals
        elif params['paths']['data'] == 'd3d_to_jet_data':
            params['paths']['shot_files'] = [d3d_full]
            params['paths']['shot_files_test'] = [jet_iterlike_wall]
            params['paths']['use_signals_dict'] = sig.fully_defined_signals
        elif params['paths']['data'] == 'd3d_to_late_jet_data':
            params['paths']['shot_files'] = [d3d_full]
            params['paths']['shot_files_test'] = [jet_iterlike_wall_late]
            params['paths']['use_signals_dict'] = sig.fully_defined_signals
        elif params['paths']['data'] == 'jet_to_d3d_data_0D':
            params['paths']['shot_files'] = [jet_full]
            params['paths']['shot_files_test'] = [d3d_full]
            params['paths']['use_signals_dict'] = sig.fully_defined_signals_0D
        elif params['paths']['data'] == 'd3d_to_jet_data_0D':
            params['paths']['shot_files'] = [d3d_full]
            params['paths']['shot_files_test'] = [jet_iterlike_wall]
            params['paths']['use_signals_dict'] = sig.fully_defined_signals_0D
        elif params['paths']['data'] == 'jet_to_d3d_data_1D':
            params['paths']['shot_files'] = [jet_full]
            params['paths']['shot_files_test'] = [d3d_full]
            params['paths']['use_signals_dict'] = sig.fully_defined_signals_1D
        elif params['paths']['data'] == 'd3d_to_jet_data_1D':
            params['paths']['shot_files'] = [d3d_full]
            params['paths']['shot_files_test'] = [jet_iterlike_wall]
            params['paths']['use_signals_dict'] = sig.fully_defined_signals_1D

        else:
            g.print_unique("Unknown dataset {}".format(
                params['paths']['data']))
            exit(1)

        if len(params['paths']['specific_signals']):
            for s in params['paths']['specific_signals']:
                if s not in params['paths']['use_signals_dict'].keys():
                    g.print_unique(
                        "Signal {} is not fully defined for {} machine. ",
                        "Skipping...".format(
                            s, params['paths']['data'].split("_")[0]))
            params['paths']['specific_signals'] = list(
                filter(
                    lambda x: x in params['paths']['use_signals_dict'].keys(),
                    params['paths']['specific_signals']))
            selected_signals = {
                k: params['paths']['use_signals_dict'][k]
                for k in params['paths']['specific_signals']
            }
            params['paths']['use_signals'] = sort_by_channels(
                list(selected_signals.values()))
        else:
            # default case
            params['paths']['use_signals'] = sort_by_channels(
                list(params['paths']['use_signals_dict'].values()))

        params['paths']['all_signals'] = sort_by_channels(
            list(params['paths']['all_signals_dict'].values()))

        g.print_unique(
            "Selected signals (determines which signals are used" +
            " for training):\n{}".format(params['paths']['use_signals']))
        params['paths']['shot_files_all'] = (
            params['paths']['shot_files'] + params['paths']['shot_files_test'])
        params['paths']['all_machines'] = list(
            set([file.machine for file in params['paths']['shot_files_all']]))

        # type assertations
        assert (isinstance(params['data']['signal_to_augment'], str)
                or isinstance(params['data']['signal_to_augment'], None))
        assert isinstance(params['data']['augment_during_training'], bool)

    return params
Exemplo n.º 8
0
def mpi_train(conf,
              shot_list_train,
              shot_list_validate,
              loader,
              callbacks_list=None,
              shot_list_test=None):
    loader.set_inference_mode(False)

    # TODO(KGF): this is not defined in conf.yaml, but added to processed dict
    # for the first time here:
    conf['num_workers'] = g.comm.Get_size()

    specific_builder = builder.ModelBuilder(conf)
    if g.tf_ver >= parse_version('1.14.0'):
        # Internal TensorFlow flags, subject to change (v1.14.0+ only?)
        try:
            from tensorflow.python.util import module_wrapper as depr
        except ImportError:
            from tensorflow.python.util import deprecation_wrapper as depr
        # depr._PRINT_DEPRECATION_WARNINGS = False  # does nothing
        depr._PER_MODULE_WARNING_LIMIT = 0
        # Suppresses warnings from "keras/backend/tensorflow_backend.py"
        # except: "Rate should be set to `rate = 1 - keep_prob`"
        # Also suppresses warnings from "keras/optimizers.py
        # does NOT suppresses warn from "/tensorflow/python/ops/math_grad.py"
    else:
        # TODO(KGF): next line suppresses ALL info and warning messages,
        # not just deprecation warnings...
        tf.logging.set_verbosity(tf.logging.ERROR)
    # TODO(KGF): for TF>v1.13.0 (esp v1.14.0), this next line prompts a ton of
    # deprecation warnings with externally-packaged Keras, e.g.:
    # WARNING:tensorflow:From  .../keras/backend/tensorflow_backend.py:174:
    # The name tf.get_default_session is deprecated.
    # Please use tf.compat.v1.get_default_session instead.
    train_model = specific_builder.build_model(False)
    # Cannot fix these Keras internals via "import tensorflow.compat.v1 as tf"
    #
    # TODO(KGF): note, these are different than C-based info diagnostics e.g.:
    # 2019-11-06 18:27:31.698908: I ...  dynamic library libcublas.so.10
    # which are NOT suppressed by set_verbosity. See top level __init__.py

    # load the latest epoch we did. Returns 0 if none exist yet
    e = specific_builder.load_model_weights(train_model)
    e_old = e

    num_epochs = conf['training']['num_epochs']
    lr_decay = conf['model']['lr_decay']
    batch_size = conf['training']['batch_size']
    lr = conf['model']['lr']
    clipnorm = conf['model']['clipnorm']
    warmup_steps = conf['model']['warmup_steps']
    # TODO(KGF): rename as "num_iter_minimum" or "min_steps_per_epoch"
    num_batches_minimum = conf['training']['num_batches_minimum']

    if 'adam' in conf['model']['optimizer']:
        optimizer = MPIAdam(lr=lr)
    elif (conf['model']['optimizer'] == 'sgd'
          or conf['model']['optimizer'] == 'tf_sgd'):
        optimizer = MPISGD(lr=lr)
    elif 'momentum_sgd' in conf['model']['optimizer']:
        optimizer = MPIMomentumSGD(lr=lr)
    else:
        print("Optimizer not implemented yet")
        exit(1)

    g.print_unique('{} epoch(s) left to go'.format(num_epochs - e))

    batch_generator = partial(loader.training_batch_generator_partial_reset,
                              shot_list=shot_list_train)

    g.print_unique("warmup steps = {}".format(warmup_steps))
    mpi_model = MPIModel(train_model,
                         optimizer,
                         g.comm,
                         batch_generator,
                         batch_size,
                         lr=lr,
                         warmup_steps=warmup_steps,
                         num_batches_minimum=num_batches_minimum,
                         conf=conf)
    mpi_model.compile(conf['model']['optimizer'], clipnorm,
                      conf['data']['target'].loss)
    tensorboard = None
    if g.task_index == 0:
        tensorboard_save_path = conf['paths']['tensorboard_save_path']
        write_grads = conf['callbacks']['write_grads']
        tensorboard = TensorBoard(log_dir=tensorboard_save_path,
                                  histogram_freq=1,
                                  write_graph=True,
                                  write_grads=write_grads)
        tensorboard.set_model(mpi_model.model)
        # TODO(KGF): check addition of TF model summary write added from fork
        fr = open('model_architecture.log', 'a')
        ori = sys.stdout
        sys.stdout = fr
        mpi_model.model.summary()
        sys.stdout = ori
        fr.close()
        mpi_model.model.summary()

    if g.task_index == 0:
        callbacks = mpi_model.build_callbacks(conf, callbacks_list)
        callbacks.set_model(mpi_model.model)
        callback_metrics = conf['callbacks']['metrics']
        callbacks.set_params({
            'epochs': num_epochs,
            'metrics': callback_metrics,
            'batch_size': batch_size,
        })
        callbacks.on_train_begin()
    if conf['callbacks']['mode'] == 'max':
        best_so_far = -np.inf
        cmp_fn = max
    else:
        best_so_far = np.inf
        cmp_fn = min

    while e < num_epochs:
        g.write_unique('\nBegin training from epoch {:.2f}/{}'.format(
            e, num_epochs))
        if g.task_index == 0:
            callbacks.on_epoch_begin(int(round(e)))
        mpi_model.set_lr(lr * lr_decay**e)

        # KGF: core work of loop performed in next line
        (step, ave_loss, curr_loss, num_so_far,
         effective_epochs) = mpi_model.train_epoch()
        e = e_old + effective_epochs
        g.write_unique('Finished training of epoch {:.2f}/{}\n'.format(
            e, num_epochs))

        # TODO(KGF): add diagnostic about "saving to epoch X"?
        loader.verbose = False  # True during the first iteration
        if g.task_index == 0:
            specific_builder.save_model_weights(train_model, int(round(e)))

        if conf['training']['no_validation']:
            break

        epoch_logs = {}
        g.write_unique('Begin evaluation of epoch {:.2f}/{}\n'.format(
            e, num_epochs))
        # TODO(KGF): flush output/ MPI barrier?
        # g.flush_all_inorder()

        # TODO(KGF): is there a way to avoid Keras.Models.load_weights()
        # repeated calls throughout mpi_make_pred*() fn calls?
        _, _, _, roc_area, loss = mpi_make_predictions_and_evaluate(
            conf, shot_list_validate, loader)

        if conf['training']['ranking_difficulty_fac'] != 1.0:
            (_, _, _, roc_area_train,
             loss_train) = mpi_make_predictions_and_evaluate(
                 conf, shot_list_train, loader)
            batch_generator = partial(
                loader.training_batch_generator_partial_reset,
                shot_list=shot_list_train)
            mpi_model.batch_iterator = batch_generator
            mpi_model.batch_iterator_func.__exit__()
            mpi_model.num_so_far_accum = mpi_model.num_so_far_indiv
            mpi_model.set_batch_iterator_func()

        if ('monitor_test' in conf['callbacks'].keys()
                and conf['callbacks']['monitor_test']):
            times = conf['callbacks']['monitor_times']
            areas, _ = mpi_make_predictions_and_evaluate_multiple_times(
                conf, shot_list_validate, loader, times)
            epoch_str = 'epoch {}, '.format(int(round(e)))
            g.write_unique(epoch_str + ' '.join([
                'val_roc_{} = {}'.format(t, roc)
                for t, roc in zip(times, areas)
            ]) + '\n')
            if shot_list_test is not None:
                areas, _ = mpi_make_predictions_and_evaluate_multiple_times(
                    conf, shot_list_test, loader, times)
                g.write_unique(epoch_str + ' '.join([
                    'test_roc_{} = {}'.format(t, roc)
                    for t, roc in zip(times, areas)
                ]) + '\n')

        epoch_logs['val_roc'] = roc_area
        epoch_logs['val_loss'] = loss
        epoch_logs['train_loss'] = ave_loss
        best_so_far = cmp_fn(epoch_logs[conf['callbacks']['monitor']],
                             best_so_far)
        stop_training = False
        g.flush_all_inorder()
        if g.task_index == 0:
            print('=========Summary======== for epoch {:.2f}'.format(e))
            print('Training Loss numpy: {:.3e}'.format(ave_loss))
            print('Validation Loss: {:.3e}'.format(loss))
            print('Validation ROC: {:.4f}'.format(roc_area))
            if conf['training']['ranking_difficulty_fac'] != 1.0:
                print('Training Loss: {:.3e}'.format(loss_train))
                print('Training ROC: {:.4f}'.format(roc_area_train))
            print('======================== ')
            callbacks.on_epoch_end(int(round(e)), epoch_logs)
            if hasattr(mpi_model.model, 'stop_training'):
                stop_training = mpi_model.model.stop_training
            # only save model weights if quantity we are tracking is improving
            if best_so_far != epoch_logs[conf['callbacks']['monitor']]:
                if ('monitor_test' in conf['callbacks'].keys()
                        and conf['callbacks']['monitor_test']):
                    print("No improvement, saving model weights anyways")
                else:
                    print("Not saving model weights")
                    specific_builder.delete_model_weights(
                        train_model, int(round(e)))

            # tensorboard
            val_generator = partial(loader.training_batch_generator,
                                    shot_list=shot_list_validate)()
            val_steps = 1
            tensorboard.on_epoch_end(val_generator, val_steps, int(round(e)),
                                     epoch_logs)
        stop_training = g.comm.bcast(stop_training, root=0)
        g.write_unique('Finished evaluation of epoch {:.2f}/{}'.format(
            e, num_epochs))
        # TODO(KGF): compare to old diagnostic:
        # g.write_unique("end epoch {}".format(e_old))
        if stop_training:
            g.write_unique("Stopping training due to early stopping")
            break

    if g.task_index == 0:
        callbacks.on_train_end()
        tensorboard.on_train_end()

    mpi_model.close()
Exemplo n.º 9
0
    def train_epoch(self):
        '''
        Perform distributed mini-batch SGD for
        one epoch.  It takes the batch iterator function and a NN model from
        MPIModel object, fetches mini-batches in a while-loop until number of
        samples seen by the ensemble of workers (num_so_far) exceeds the
        training dataset size (num_total).

        NOTE: "sample" = "an entire shot" within this description

        During each iteration, the gradient updates (deltas) and the loss are
        calculated for each model replica in the ensemble, weights are averaged
        over ensemble, and the new weights are set.

        It performs calls to: MPIModel.get_deltas, MPIModel.set_new_weights
        methods

        Argument list: Empty

        Returns:
          - step: final iteration number
          - ave_loss: model loss averaged over iterations within this epoch
          - curr_loss: training loss averaged over replicas at final iteration
          - num_so_far: the cumulative number of samples seen by the ensemble
        of replicas up to the end of the final iteration (step) of this epoch

        Intermediate outputs and logging: debug printout of task_index (MPI),
        epoch number, number of samples seen to a current epoch, average
        training loss

        '''

        verbose = False
        first_run = True
        step = 0
        loss_averager = Averager()
        t_start = time.time()

        timeline_prof = False
        if (self.conf is not None and conf['training']['timeline_prof']):
            timeline_prof = True
        step_limit = 0
        if (self.conf is not None and conf['training']['step_limit'] > 0):
            step_limit = conf['training']['step_limit']

        batch_iterator_func = self.batch_iterator_func
        num_total = 1
        ave_loss = -1
        curr_loss = -1
        t0 = 0
        t1 = 0
        t2 = 0

        while ((self.num_so_far - self.epoch * num_total) < num_total
               or step < self.num_batches_minimum):
            if step_limit > 0 and step > step_limit:
                print('reached step limit')
                break
            try:
                (batch_xs, batch_ys, batches_to_reset, num_so_far_curr,
                 num_total, is_warmup_period) = next(batch_iterator_func)
            except StopIteration:
                g.print_unique("Resetting batch iterator.")
                self.num_so_far_accum = self.num_so_far_indiv
                self.set_batch_iterator_func()
                batch_iterator_func = self.batch_iterator_func
                (batch_xs, batch_ys, batches_to_reset, num_so_far_curr,
                 num_total, is_warmup_period) = next(batch_iterator_func)
            self.num_so_far_indiv = self.num_so_far_accum + num_so_far_curr

            # if batches_to_reset:
            # self.model.reset_states(batches_to_reset)

            warmup_phase = (step < self.warmup_steps and self.epoch == 0)
            num_replicas = 1 if warmup_phase else self.num_replicas

            self.num_so_far = self.mpi_sum_scalars(self.num_so_far_indiv,
                                                   num_replicas)

            # run the model once to force compilation. Don't actually use these
            # values.
            if first_run:
                first_run = False
                t0_comp = time.time()
                #   print('input_dimension:',batch_xs.shape)
                #   print('output_dimension:',batch_ys.shape)
                _, _ = self.train_on_batch_and_get_deltas(
                    batch_xs, batch_ys, verbose)
                self.comm.Barrier()
                sys.stdout.flush()
                # TODO(KGF): check line feed/carriage returns around this
                g.print_unique(
                    '\nCompilation finished in {:.2f}s'.format(time.time() -
                                                               t0_comp))
                t_start = time.time()
                sys.stdout.flush()

            if np.any(batches_to_reset):
                reset_states(self.model, batches_to_reset)
            if ('noise' in self.conf['training'].keys()
                    and self.conf['training']['noise'] is not False):
                batch_xs = self.add_noise(batch_xs)
            t0 = time.time()
            deltas, loss = self.train_on_batch_and_get_deltas(
                batch_xs, batch_ys, verbose)
            t1 = time.time()
            if not is_warmup_period:
                self.set_new_weights(deltas, num_replicas)
                t2 = time.time()
                write_str_0 = self.calculate_speed(t0, t1, t2, num_replicas)
                curr_loss = self.mpi_average_scalars(1.0 * loss, num_replicas)
                # g.print_unique(self.model.get_weights()[0][0][:4])
                loss_averager.add_val(curr_loss)
                ave_loss = loss_averager.get_ave()
                eta = self.estimate_remaining_time(
                    t0 - t_start, self.num_so_far - self.epoch * num_total,
                    num_total)
                write_str = (
                    '\r[{}] step: {} [ETA: {:.2f}s] [{:.2f}/{}], '.format(
                        self.task_index, step, eta, 1.0 * self.num_so_far,
                        num_total) +
                    'loss: {:.5f} [{:.5f}] | '.format(ave_loss, curr_loss) +
                    'walltime: {:.4f} | '.format(time.time() -
                                                 self.start_time))
                g.write_unique(write_str + write_str_0)

                if timeline_prof:
                    # dump profile
                    tl = timeline.Timeline(self.run_metadata.step_stats)
                    ctf = tl.generate_chrome_trace_format()
                    # dump file per iteration
                    with open('timeline_%s.json' % step, 'w') as f:
                        f.write(ctf)

                step += 1
            else:
                g.write_unique('\r[{}] warmup phase, num so far: {}'.format(
                    self.task_index, self.num_so_far))

        effective_epochs = 1.0 * self.num_so_far / num_total
        epoch_previous = self.epoch
        self.epoch = effective_epochs
        g.write_unique(
            # TODO(KGF): "a total of X epochs within this session" ?
            '\nFinished training epoch {:.2f} '.format(self.epoch)
            # TODO(KGF): "precisely/exactly X epochs just passed"?
            +
            'during this session ({:.2f} epochs passed)'.format(self.epoch -
                                                                epoch_previous)
            # '\nEpoch {:.2f} finished training ({:.2f} epochs passed)'.format(
            #     1.0 * self.epoch, self.epoch - epoch_previous)
            + ' in {:.2f} seconds\n'.format(t2 - t_start))
        return (step, ave_loss, curr_loss, self.num_so_far, effective_epochs)
Exemplo n.º 10
0
def fetch_d3d_data(signal_path, shot, c=None):
    tree, signal = get_tree_and_tag_no_backslash(signal_path)
    if tree is None:
        signal = c.get('findsig("' + signal + '",_fstree)').value
        tree = c.get('_fstree').value
    # if c is None:
    # c = MDSplus.Connection('atlas.gat.com')

    # Retrieve data
    found = False
    xdata = np.array([0])
    ydata = None
    data = np.array([0])

    # Retrieve data from MDSplus (thin)
    # first try, retrieve directly from tree andsignal
    def get_units(str):
        units = c.get('units_of(' + str + ')').data()
        if units == '' or units == ' ':
            units = c.get('units(' + str + ')').data()
        return units

    try:
        c.openTree(tree, shot)
        data = c.get('_s = ' + signal).data()
        # data_units = c.get('units_of(_s)').data()
        rank = np.ndim(data)
        found = True

    except Exception as e:
        g.print_unique(e)
        sys.stdout.flush()
        pass

    # Retrieve data from PTDATA if node not found
    if not found:
        # g.print_unique("not in full path {}".format(signal))
        data = c.get('_s = ptdata2("' + signal + '",' + str(shot) + ')').data()
        if len(data) != 1:
            rank = np.ndim(data)
            found = True
    # Retrieve data from Pseudo-pointname if not in ptdata
    if not found:
        # g.print_unique("not in PTDATA {}".format(signal))
        data = c.get('_s = pseudo("' + signal + '",' + str(shot) + ')').data()
        if len(data) != 1:
            rank = np.ndim(data)
            found = True
    # this means the signal wasn't found
    if not found:
        g.print_unique("No such signal: {}".format(signal))
        pass

    # get time base
    if found:
        if rank > 1:
            xdata = c.get('dim_of(_s,1)').data()
            ydata = c.get('dim_of(_s)').data()
        else:
            xdata = c.get('dim_of(_s)').data()

    # MDSplus seems to return 2-D arrays transposed.  Change them back.
    if np.ndim(data) == 2:
        data = np.transpose(data)
    if np.ndim(ydata) == 2:
        ydata = np.transpose(ydata)
    if np.ndim(xdata) == 2:
        xdata = np.transpose(xdata)

    # print('   GADATA Retrieval Time : ', time.time() - t0)
    xdata = xdata * 1e-3  # time is measued in ms
    return xdata, data, ydata, found
Exemplo n.º 11
0
    'pradedget': pradedget,
    'pint': pint,
    'torqueint': torqueint,
    'ipdirectt': ipdirectt,
    'iptargett': iptargett,
    'iperrt': iperrt,
    'etemp_profilet': etemp_profilet,
    'edens_profilet': edens_profilet,
}

# for actual data analysis, use:
# all_signals_restricted = [q95, li, ip, energy, lm, dens, pradcore, pradtot,
# pin, etemp_profile, edens_profile]
all_signals_restricted = all_signals

g.print_unique('All signals (determines which signals are downloaded'
               ' & preprocessed):')
g.print_unique(all_signals.values())

fully_defined_signals = {
    sig_name: sig
    for (sig_name, sig) in all_signals_restricted.items()
    if (sig.is_defined_on_machines(all_machines))
}
fully_defined_signals_0D = {
    sig_name: sig
    for (sig_name, sig) in all_signals_restricted.items()
    if (sig.is_defined_on_machines(all_machines) and sig.num_channels == 1)
}
fully_defined_signals_1D = {
    sig_name: sig
    for (sig_name, sig) in all_signals_restricted.items()
Exemplo n.º 12
0
    def train_epoch(self):
        '''
        The purpose of the method is to perform distributed mini-batch SGD for
        one epoch.  It takes the batch iterator function and a NN model from
        MPIModel object, fetches mini-batches in a while-loop until number of
        samples seen by the ensemble of workers (num_so_far) exceeds the
        training dataset size (num_total).

        During each iteration, the gradient updates (deltas) and the loss are
        calculated for each model replica in the ensemble, weights are averaged
        over ensemble, and the new weights are set.

        It performs calls to: MPIModel.get_deltas, MPIModel.set_new_weights
        methods

        Argument list: Empty

        Returns:
          - step: epoch number
          - ave_loss: training loss averaged over replicas
          - curr_loss:
          - num_so_far: the number of samples seen by ensemble of replicas to a
        current epoch (step)

        Intermediate outputs and logging: debug printout of task_index (MPI),
        epoch number, number of samples seen to a current epoch, average
        training loss

        '''

        verbose = False
        first_run = True
        step = 0
        loss_averager = Averager()
        t_start = time.time()

        batch_iterator_func = self.batch_iterator_func
        num_total = 1
        ave_loss = -1
        curr_loss = -1
        t0 = 0
        t1 = 0
        t2 = 0

        while ((self.num_so_far - self.epoch * num_total) < num_total
               or step < self.num_batches_minimum):
            try:
                (batch_xs, batch_ys, batches_to_reset, num_so_far_curr,
                 num_total, is_warmup_period) = next(batch_iterator_func)
            except StopIteration:
                g.print_unique("Resetting batch iterator.")
                self.num_so_far_accum = self.num_so_far_indiv
                self.set_batch_iterator_func()
                batch_iterator_func = self.batch_iterator_func
                (batch_xs, batch_ys, batches_to_reset, num_so_far_curr,
                 num_total, is_warmup_period) = next(batch_iterator_func)
            self.num_so_far_indiv = self.num_so_far_accum + num_so_far_curr

            # if batches_to_reset:
            # self.model.reset_states(batches_to_reset)

            warmup_phase = (step < self.warmup_steps and self.epoch == 0)
            num_replicas = 1 if warmup_phase else self.num_replicas

            self.num_so_far = self.mpi_sum_scalars(self.num_so_far_indiv,
                                                   num_replicas)

            # run the model once to force compilation. Don't actually use these
            # values.
            if first_run:
                first_run = False
                t0_comp = time.time()
                _, _ = self.train_on_batch_and_get_deltas(
                    batch_xs, batch_ys, verbose)
                self.comm.Barrier()
                sys.stdout.flush()
                # TODO(KGF): check line feed/carriage returns around this
                g.print_unique(
                    '\nCompilation finished in {:.2f}s'.format(time.time() -
                                                               t0_comp))
                t_start = time.time()
                sys.stdout.flush()

            if np.any(batches_to_reset):
                reset_states(self.model, batches_to_reset)

            t0 = time.time()
            deltas, loss = self.train_on_batch_and_get_deltas(
                batch_xs, batch_ys, verbose)
            t1 = time.time()
            if not is_warmup_period:
                self.set_new_weights(deltas, num_replicas)
                t2 = time.time()
                write_str_0 = self.calculate_speed(t0, t1, t2, num_replicas)
                curr_loss = self.mpi_average_scalars(1.0 * loss, num_replicas)
                # g.print_unique(self.model.get_weights()[0][0][:4])
                loss_averager.add_val(curr_loss)
                ave_loss = loss_averager.get_val()
                eta = self.estimate_remaining_time(
                    t0 - t_start, self.num_so_far - self.epoch * num_total,
                    num_total)
                write_str = (
                    '\r[{}] step: {} [ETA: {:.2f}s] [{:.2f}/{}], '.format(
                        self.task_index, step, eta, 1.0 * self.num_so_far,
                        num_total) +
                    'loss: {:.5f} [{:.5f}] | '.format(ave_loss, curr_loss) +
                    'walltime: {:.4f} | '.format(time.time() -
                                                 self.start_time))
                g.write_unique(write_str + write_str_0)
                step += 1
            else:
                g.write_unique('\r[{}] warmup phase, num so far: {}'.format(
                    self.task_index, self.num_so_far))

        effective_epochs = 1.0 * self.num_so_far / num_total
        epoch_previous = self.epoch
        self.epoch = effective_epochs
        g.write_unique(
            # TODO(KGF): "a total of X epochs within this session" ?
            '\nFinished training epoch {:.2f} '.format(self.epoch)
            # TODO(KGF): "precisely/exactly X epochs just passed"?
            +
            'during this session ({:.2f} epochs passed)'.format(self.epoch -
                                                                epoch_previous)
            # '\nEpoch {:.2f} finished training ({:.2f} epochs passed)'.format(
            #     1.0 * self.epoch, self.epoch - epoch_previous)
            + ' in {:.2f} seconds\n'.format(t2 - t_start))
        return (step, ave_loss, curr_loss, self.num_so_far, effective_epochs)
Exemplo n.º 13
0
def parameters(input_file):
    """Parse yaml file of configuration parameters."""
    # TODO(KGF): the following line imports TensorFlow as a Keras backend
    # by default (absent env variable KERAS_BACKEND and/or config file
    # $HOME/.keras/keras.json) "from plasma.conf import conf"
    # via "import keras.backend as K" in targets.py
    from plasma.models.targets import (HingeTarget, MaxHingeTarget,
                                       BinaryTarget, TTDTarget, TTDInvTarget,
                                       TTDLinearTarget)
    with open(input_file, 'r') as yaml_file:
        params = yaml.load(yaml_file, Loader=yaml.SafeLoader)
        params['user_name'] = getpass.getuser()
        base_path = params['fs_path']
        output_path = os.path.join(base_path, params['user_name'])
        # TODO(KGF): this next line should be deleted at some pt, breaking BC
        base_path = output_path
        print(output_path)
        # TODO(KGF): allow for completely indpendent save/output_path vs. base_path
        # configured in conf.yaml. don't assume username subdirectory, or pwd
        # save_path = os.environ.get("PWD")

        params['paths']['base_path'] = base_path
        params['paths']['output_path'] = output_path
        if isinstance(params['paths']['signal_prepath'], list):
            g.print_unique('Reading from multiple data folders!')
            params['paths']['signal_prepath'] = [
                base_path + s for s in params['paths']['signal_prepath']
            ]
        else:
            params['paths']['signal_prepath'] = (
                base_path + params['paths']['signal_prepath'])
        params['paths']['shot_list_dir'] = (base_path +
                                            params['paths']['shot_list_dir'])
        # See notes in data/signals.py for details on signal tols relative to
        # t_disrupt. The following 2x dataset definitions permit progressively
        # worse signal quality when preprocessing the shots and omitting some
        if params['paths']['data'] == 'd3d_data_max_tol':
            # let signals terminate up to 29 ms before t_disrupt on D3D
            h = myhash_signals(sig.all_signals_max_tol.values())
        elif params['paths']['data'] == 'd3d_data_garbage':
            # let up to 3x signals disappear at any time before t_disrupt
            # (and NaNs?)
            # -----
            # temp workaround for identical signal dictionary (but different
            # omit criteria in shots.py Shot.get_signals_and_times_from_file())
            # ---> 2x hash int
            # TODO(KGF): not robust; create reproducible specification and
            # recording of signal filtering procedure
            h = myhash_signals(sig.all_signals_max_tol.values()) * 2
        else:
            h = myhash_signals(sig.all_signals.values())

        params['paths']['global_normalizer_path'] = (
            output_path +
            '/normalization/normalization_signal_group_{}.npz'.format(h))
        if params['training']['hyperparam_tuning']:
            # params['paths']['saved_shotlist_path'] =
            # './normalization/shot_lists.npz'
            params['paths']['normalizer_path'] = (
                './normalization/normalization_signal_group_{}.npz'.format(h))
            params['paths']['model_save_path'] = './model_checkpoints/'
            params['paths']['csvlog_save_path'] = './csv_logs/'
            params['paths']['results_prepath'] = './results/'
        else:
            # params['paths']['saved_shotlist_path'] = output_path +
            # '/normalization/shot_lists.npz'
            params['paths']['normalizer_path'] = (
                params['paths']['global_normalizer_path'])
            params['paths']['model_save_path'] = (output_path +
                                                  '/model_checkpoints/')
            params['paths']['csvlog_save_path'] = output_path + '/csv_logs/'
            params['paths']['results_prepath'] = output_path + '/results/'
        params['paths']['tensorboard_save_path'] = (
            output_path + params['paths']['tensorboard_save_path'])
        params['paths']['saved_shotlist_path'] = (
            params['paths']['base_path'] + '/processed_shotlists/' +
            params['paths']['data'] +
            '/shot_lists_signal_group_{}.npz'.format(h))
        params['paths']['processed_prepath'] = (output_path +
                                                '/processed_shots/' +
                                                'signal_group_{}/'.format(h))
        # ensure shallow model has +1 -1 target.
        if params['model']['shallow'] or params['target'] == 'hinge':
            params['data']['target'] = HingeTarget
        elif params['target'] == 'maxhinge':
            MaxHingeTarget.fac = params['data']['positive_example_penalty']
            params['data']['target'] = MaxHingeTarget
        elif params['target'] == 'binary':
            params['data']['target'] = BinaryTarget
        elif params['target'] == 'ttd':
            params['data']['target'] = TTDTarget
        elif params['target'] == 'ttdinv':
            params['data']['target'] = TTDInvTarget
        elif params['target'] == 'ttdlinear':
            params['data']['target'] = TTDLinearTarget
        else:
            # TODO(KGF): "Target" base class is unused here
            g.print_unique('Unknown type of target. Exiting')
            exit(1)

        # params['model']['output_activation'] =
        # params['data']['target'].activation
        # binary crossentropy performs slightly better?
        # params['model']['loss'] = params['data']['target'].loss

        # signals
        if params['paths']['data'] in ['d3d_data_max_tol', 'd3d_data_garbage']:
            params['paths']['all_signals_dict'] = sig.all_signals_max_tol
        else:
            params['paths']['all_signals_dict'] = sig.all_signals

        # assert order
        # q95, li, ip, lm, betan, energy, dens, pradcore, pradedge, pin,
        # pechin, torquein, ipdirect, etemp_profile, edens_profile

        # shot lists
        jet_carbon_wall = ShotListFiles(sig.jet,
                                        params['paths']['shot_list_dir'],
                                        ['CWall_clear.txt', 'CFC_unint.txt'],
                                        'jet carbon wall data')
        jet_iterlike_wall = ShotListFiles(
            sig.jet, params['paths']['shot_list_dir'],
            ['ILW_unint.txt', 'BeWall_clear.txt'], 'jet iter like wall data')
        jet_iterlike_wall_late = ShotListFiles(
            sig.jet, params['paths']['shot_list_dir'],
            ['ILW_unint_late.txt', 'ILW_clear_late.txt'],
            'Late jet iter like wall data')
        # jet_iterlike_wall_full = ShotListFiles(
        #     sig.jet, params['paths']['shot_list_dir'],
        #     ['ILW_unint_full.txt', 'ILW_clear_full.txt'],
        #     'Full jet iter like wall data')

        jenkins_jet_carbon_wall = ShotListFiles(
            sig.jet, params['paths']['shot_list_dir'],
            ['jenkins_CWall_clear.txt', 'jenkins_CFC_unint.txt'],
            'Subset of jet carbon wall data for Jenkins tests')
        jenkins_jet_iterlike_wall = ShotListFiles(
            sig.jet, params['paths']['shot_list_dir'],
            ['jenkins_ILW_unint.txt', 'jenkins_BeWall_clear.txt'],
            'Subset of jet iter like wall data for Jenkins tests')

        jet_full = ShotListFiles(sig.jet, params['paths']['shot_list_dir'], [
            'ILW_unint.txt', 'BeWall_clear.txt', 'CWall_clear.txt',
            'CFC_unint.txt'
        ], 'jet full data')

        # d3d_10000 = ShotListFiles(
        #     sig.d3d, params['paths']['shot_list_dir'],
        #     ['d3d_clear_10000.txt', 'd3d_disrupt_10000.txt'],
        #     'd3d data 10000 ND and D shots')
        # d3d_1000 = ShotListFiles(
        #     sig.d3d, params['paths']['shot_list_dir'],
        #     ['d3d_clear_1000.txt', 'd3d_disrupt_1000.txt'],
        #     'd3d data 1000 ND and D shots')
        # d3d_100 = ShotListFiles(
        #     sig.d3d, params['paths']['shot_list_dir'],
        #     ['d3d_clear_100.txt', 'd3d_disrupt_100.txt'],
        #     'd3d data 100 ND and D shots')
        d3d_full = ShotListFiles(
            sig.d3d, params['paths']['shot_list_dir'],
            ['d3d_clear_data_avail.txt', 'd3d_disrupt_data_avail.txt'],
            'd3d data since shot 125500')  # to 168555
        # superset of d3d_full added in 2019 from C. Rea:
        d3d_full_2019 = ShotListFiles(
            sig.d3d, params['paths']['shot_list_dir'],
            ['d3d_clear_since_2016.txt', 'd3d_disrupt_since_2016.txt'],
            'd3d data since shot 125500')  # to 180847
        d3d_jenkins = ShotListFiles(
            sig.d3d, params['paths']['shot_list_dir'],
            ['jenkins_d3d_clear.txt', 'jenkins_d3d_disrupt.txt'],
            'Subset of d3d data for Jenkins test')

        # TODO(KGF): currently unused shot list files in project directory
        # /tigress/FRNN/shot_lists/:
        # d3d_clear.txt : 40560, 168554
        # d3d_disrupt   : 100000, 168555

        # TODO(KGF): should /tigress/FRNN/shot_lists/ be organized into subdirs
        # like the original repo directory data/shot_lists/d3d/, jet/, nstx/ ?

        # d3d_jb_full = ShotListFiles(
        #     sig.d3d, params['paths']['shot_list_dir'],
        #     ['shotlist_JaysonBarr_clear.txt',
        #      'shotlist_JaysonBarr_disrupt.txt'],
        #     'd3d shots since 160000-170000')

        # nstx_full = ShotListFiles(
        #     nstx, params['paths']['shot_list_dir'],
        #     ['disrupt_nstx.txt'], 'nstx shots (all are disruptive')
        # ==================
        # JET DATASETS
        # ==================
        if params['paths']['data'] == 'jet_all':
            params['paths']['shot_files'] = [jet_carbon_wall]
            params['paths']['shot_files_test'] = [jet_iterlike_wall]
            params['paths']['use_signals_dict'] = sig.jet_signals
        elif params['paths']['data'] == 'jet_0D':
            params['paths']['shot_files'] = [jet_carbon_wall]
            params['paths']['shot_files_test'] = [jet_iterlike_wall]
            params['paths']['use_signals_dict'] = sig.jet_signals_0D
        elif params['paths']['data'] == 'jet_1D':
            params['paths']['shot_files'] = [jet_carbon_wall]
            params['paths']['shot_files_test'] = [jet_iterlike_wall]
            params['paths']['use_signals_dict'] = sig.jet_signals_1D
        elif params['paths']['data'] == 'jet_late':
            params['paths']['shot_files'] = [jet_iterlike_wall_late]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = sig.jet_signals
        elif params['paths']['data'] == 'jet_carbon_to_late_0D':
            params['paths']['shot_files'] = [jet_carbon_wall]
            params['paths']['shot_files_test'] = [jet_iterlike_wall_late]
            params['paths']['use_signals_dict'] = sig.jet_signals_0D
        elif params['paths']['data'] == 'jet_temp_profile':
            params['paths']['shot_files'] = [jet_carbon_wall]
            params['paths']['shot_files_test'] = [jet_iterlike_wall]
            params['paths']['use_signals_dict'] = {
                'etemp_profile': sig.etemp_profile
            }
        elif params['paths']['data'] == 'jet_dens_profile':
            params['paths']['shot_files'] = [jet_carbon_wall]
            params['paths']['shot_files_test'] = [jet_iterlike_wall]
            params['paths']['use_signals_dict'] = {
                'edens_profile': sig.edens_profile
            }
        elif params['paths']['data'] == 'jet_carbon_all':
            params['paths']['shot_files'] = [jet_carbon_wall]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = sig.jet_signals
        elif params['paths']['data'] == 'jet_mixed_all':
            params['paths']['shot_files'] = [jet_full]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = sig.jet_signals
        elif params['paths']['data'] == 'jenkins_jet':
            params['paths']['shot_files'] = [jenkins_jet_carbon_wall]
            params['paths']['shot_files_test'] = [jenkins_jet_iterlike_wall]
            params['paths']['use_signals_dict'] = sig.jet_signals
        # JET data but with fully defined signals
        elif params['paths']['data'] == 'jet_fully_defined':
            params['paths']['shot_files'] = [jet_carbon_wall]
            params['paths']['shot_files_test'] = [jet_iterlike_wall]
            params['paths']['use_signals_dict'] = sig.fully_defined_signals
        # JET data but with fully defined signals
        elif params['paths']['data'] == 'jet_fully_defined_0D':
            params['paths']['shot_files'] = [jet_carbon_wall]
            params['paths']['shot_files_test'] = [jet_iterlike_wall]
            params['paths']['use_signals_dict'] = sig.fully_defined_signals_0D
        # ==================
        # D3D DATASETS
        # ==================
        elif params['paths']['data'] == 'd3d_all':
            params['paths']['shot_files'] = [d3d_full]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = {
                'q95': sig.q95,
                'li': sig.li,
                'ip': sig.ip,
                'lm': sig.lm,
                'betan': sig.betan,
                'energy': sig.energy,
                'dens': sig.dens,
                'pradcore': sig.pradcore,
                'pradedge': sig.pradedge,
                'pin': sig.pin,
                'torquein': sig.torquein,
                'ipdirect': sig.ipdirect,
                'iptarget': sig.iptarget,
                'iperr': sig.iperr,
                'etemp_profile': sig.etemp_profile,
                'edens_profile': sig.edens_profile,
            }
        elif params['paths']['data'] in [
                'd3d_data_max_tol', 'd3d_data_garbage'
        ]:
            params['paths']['shot_files'] = [d3d_full_2019]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = {
                'q95t': sig.q95t,
                'lit': sig.lit,
                'ipt': sig.ipt,
                'lmt': sig.lmt,
                'betant': sig.betant,
                'energyt': sig.energyt,
                'denst': sig.denst,
                'pradcoret': sig.pradcoret,
                'pradedget': sig.pradedget,
                'pint': sig.pint,
                'torqueint': sig.torqueint,
                'ipdirectt': sig.ipdirectt,
                'iptargett': sig.iptargett,
                'iperrt': sig.iperrt,
                'etemp_profilet': sig.etemp_profilet,
                'edens_profilet': sig.edens_profilet,
            }
        elif params['paths']['data'] == 'd3d_2019':
            params['paths']['shot_files'] = [d3d_full_2019]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = {
                'q95': sig.q95,
                'li': sig.li,
                'ip': sig.ip,
                'lm': sig.lm,
                'betan': sig.betan,
                'energy': sig.energy,
                'dens': sig.dens,
                'pradcore': sig.pradcore,
                'pradedge': sig.pradedge,
                'pin': sig.pin,
                'torquein': sig.torquein,
                'ipdirect': sig.ipdirect,
                'iptarget': sig.iptarget,
                'iperr': sig.iperr,
                'etemp_profile': sig.etemp_profile,
                'edens_profile': sig.edens_profile,
            }
        elif params['paths']['data'] == 'd3d_1D':
            params['paths']['shot_files'] = [d3d_full]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = {
                'ipdirect': sig.ipdirect,
                'etemp_profile': sig.etemp_profile,
                'edens_profile': sig.edens_profile,
            }
        elif params['paths']['data'] == 'd3d_all_profiles':
            params['paths']['shot_files'] = [d3d_full]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = {
                'ipdirect': sig.ipdirect,
                'etemp_profile': sig.etemp_profile,
                'edens_profile': sig.edens_profile,
                'itemp_profile': sig.itemp_profile,
                'zdens_profile': sig.zdens_profile,
                'trot_profile': sig.trot_profile,
                'pthm_profile': sig.pthm_profile,
                'neut_profile': sig.neut_profile,
                'q_profile': sig.q_profile,
                'bootstrap_current_profile': sig.bootstrap_current_profile,
                'q_psi_profile': sig.q_psi_profile,
            }
        elif params['paths']['data'] == 'd3d_0D':
            params['paths']['shot_files'] = [d3d_full]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = {
                'q95': sig.q95,
                'li': sig.li,
                'ip': sig.ip,
                'lm': sig.lm,
                'betan': sig.betan,
                'energy': sig.energy,
                'dens': sig.dens,
                'pradcore': sig.pradcore,
                'pradedge': sig.pradedge,
                'pin': sig.pin,
                'torquein': sig.torquein,
                'ipdirect': sig.ipdirect,
                'iptarget': sig.iptarget,
                'iperr': sig.iperr,
            }
        # TODO(KGF): rename. Unlike JET, there are probably differences between
        # sig.d3d_signals and the manually-defined sigs in above d3d_all
        # elif params['paths']['data'] == 'd3d_all':
        #     params['paths']['shot_files'] = [d3d_full]
        #     params['paths']['shot_files_test'] = []
        #     params['paths']['use_signals_dict'] = sig.d3d_signals
        elif params['paths']['data'] == 'jenkins_d3d':
            params['paths']['shot_files'] = [d3d_jenkins]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = {
                'q95': sig.q95,
                'li': sig.li,
                'ip': sig.ip,
                'lm': sig.lm,
                'betan': sig.betan,
                'energy': sig.energy,
                'dens': sig.dens,
                'pradcore': sig.pradcore,
                'pradedge': sig.pradedge,
                'pin': sig.pin,
                'torquein': sig.torquein,
                'ipdirect': sig.ipdirect,
                'iptarget': sig.iptarget,
                'iperr': sig.iperr,
                'etemp_profile': sig.etemp_profile,
                'edens_profile': sig.edens_profile,
            }
        # jet data but with fully defined signals
        elif params['paths']['data'] == 'd3d_fully_defined':
            params['paths']['shot_files'] = [d3d_full]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = sig.fully_defined_signals
        # jet data but with fully defined signals
        elif params['paths']['data'] == 'd3d_fully_defined_0D':
            params['paths']['shot_files'] = [d3d_full]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = sig.fully_defined_signals_0D
        elif params['paths']['data'] == 'd3d_temp_profile':
            # jet data but with fully defined signals
            params['paths']['shot_files'] = [d3d_full]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = {
                'etemp_profile': sig.etemp_profile
            }  # fully_defined_signals_0D
        elif params['paths']['data'] == 'd3d_dens_profile':
            # jet data but with fully defined signals
            params['paths']['shot_files'] = [d3d_full]
            params['paths']['shot_files_test'] = []
            params['paths']['use_signals_dict'] = {
                'edens_profile': sig.edens_profile
            }  # fully_defined_signals_0D
        # ======================
        # CROSS-MACHINE DATASETS
        # ======================
        elif params['paths']['data'] == 'jet_to_d3d_all':
            params['paths']['shot_files'] = [jet_full]
            params['paths']['shot_files_test'] = [d3d_full]
            params['paths']['use_signals_dict'] = sig.fully_defined_signals
        elif params['paths']['data'] == 'd3d_to_jet_all':
            params['paths']['shot_files'] = [d3d_full]
            params['paths']['shot_files_test'] = [jet_iterlike_wall]
            params['paths']['use_signals_dict'] = sig.fully_defined_signals
        elif params['paths']['data'] == 'd3d_to_late_jet':
            params['paths']['shot_files'] = [d3d_full]
            params['paths']['shot_files_test'] = [jet_iterlike_wall_late]
            params['paths']['use_signals_dict'] = sig.fully_defined_signals
        elif params['paths']['data'] == 'jet_to_d3d_0D':
            params['paths']['shot_files'] = [jet_full]
            params['paths']['shot_files_test'] = [d3d_full]
            params['paths']['use_signals_dict'] = sig.fully_defined_signals_0D
        elif params['paths']['data'] == 'd3d_to_jet_0D':
            params['paths']['shot_files'] = [d3d_full]
            params['paths']['shot_files_test'] = [jet_iterlike_wall]
            params['paths']['use_signals_dict'] = sig.fully_defined_signals_0D
        elif params['paths']['data'] == 'jet_to_d3d_1D':
            params['paths']['shot_files'] = [jet_full]
            params['paths']['shot_files_test'] = [d3d_full]
            params['paths']['use_signals_dict'] = sig.fully_defined_signals_1D
        elif params['paths']['data'] == 'd3d_to_jet_1D':
            params['paths']['shot_files'] = [d3d_full]
            params['paths']['shot_files_test'] = [jet_iterlike_wall]
            params['paths']['use_signals_dict'] = sig.fully_defined_signals_1D

        else:
            g.print_unique("Unknown dataset {}".format(
                params['paths']['data']))
            exit(1)

        if len(params['paths']['specific_signals']):
            for s in params['paths']['specific_signals']:
                if s not in params['paths']['use_signals_dict'].keys():
                    g.print_unique(
                        "Signal {} is not fully defined for {} machine. ",
                        "Skipping...".format(
                            s, params['paths']['data'].split("_")[0]))
            params['paths']['specific_signals'] = list(
                filter(
                    lambda x: x in params['paths']['use_signals_dict'].keys(),
                    params['paths']['specific_signals']))
            selected_signals = {
                k: params['paths']['use_signals_dict'][k]
                for k in params['paths']['specific_signals']
            }
            params['paths']['use_signals'] = sort_by_channels(
                list(selected_signals.values()))
        else:
            # default case
            params['paths']['use_signals'] = sort_by_channels(
                list(params['paths']['use_signals_dict'].values()))

        params['paths']['all_signals'] = sort_by_channels(
            list(params['paths']['all_signals_dict'].values()))

        g.print_unique(
            "Selected signals (determines which signals are used" +
            " for training):\n{}".format(params['paths']['use_signals']))
        params['paths']['shot_files_all'] = (
            params['paths']['shot_files'] + params['paths']['shot_files_test'])
        params['paths']['all_machines'] = list(
            set([file.machine for file in params['paths']['shot_files_all']]))

        # type assertations
        assert (isinstance(params['data']['signal_to_augment'], str)
                or isinstance(params['data']['signal_to_augment'], None))
        assert isinstance(params['data']['augment_during_training'], bool)

    return params
Exemplo n.º 14
0
 def print_summary(self, action='loaded'):
     g.print_unique(
         '{} normalization data from {} shots ( {} disruptive )'.format(
             action, self.num_processed, self.num_disruptive))
Exemplo n.º 15
0
from plasma.conf_parser import parameters
import os
import errno
import plasma.global_vars as g

# TODO(KGF): this conf.py feels like an unnecessary level of indirection
if g.conf_file is not None:
    g.print_unique(f"Loading configuration from {g.conf_file}")
    conf = parameters(g.conf_file)
elif os.path.exists(
        os.path.join(os.path.abspath(os.path.dirname(__file__)),
                     '../examples/conf.yaml')):
    conf = parameters(
        os.path.join(os.path.abspath(os.path.dirname(__file__)),
                     '../examples/conf.yaml'))
elif os.path.exists('./conf.yaml'):
    conf = parameters('./conf.yaml')
elif os.path.exists('./examples/conf.yaml'):
    conf = parameters('./examples/conf.yaml')
elif os.path.exists('../examples/conf.yaml'):
    conf = parameters('../examples/conf.yaml')
else:
    raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT),
                            'conf.yaml')