Exemplo n.º 1
0
def track_eff_of_edge_selected(evtid,
                               config_name,
                               matching_cut=0.8,
                               remove_duplicated_hits=False):
    config = load_yaml(config_name)
    evt_dir = config['track_ml']['dir']
    layers = config['doublets_from_cuts']['layers']
    sel_layer_id = select_pair_layers(layers)

    event = Event(evt_dir)
    event.read(evtid)
    barrel_hits = event.filter_hits(layers)

    data_source = 'doublets_from_cuts'
    cfg = config[data_source]
    pairs_selected_dir = cfg['selected']
    pairs_input_dir = os.path.join(pairs_selected_dir, 'evt{}'.format(evtid))

    all_segments = []
    for pair_id in sel_layer_id:
        file_name = os.path.join(pairs_input_dir,
                                 'pair{:03d}.h5'.format(pair_id))
        try:
            with pd.HDFStore(file_name, 'r') as store:
                df = store.get('data')
        except KeyError:
            pass
        else:
            all_segments.append(df)

    segments = pd.concat(all_segments, ignore_index=True)
    graph = utils_data.segments_to_nx(barrel_hits,
                                      segments,
                                      sender_hitid_name='hit_id_in',
                                      receiver_hitid_name='hit_id_out',
                                      solution_name='true',
                                      use_digraph=True,
                                      bidirection=False)
    track_cands = wrangler.get_tracks(graph,
                                      feature_name='solution',
                                      with_fit=False)
    df_track_cands = analysis.graphs_to_df(track_cands)
    summary = analysis.summary_on_prediction(graph,
                                             barrel_hits,
                                             df_track_cands,
                                             matching_cut=matching_cut)

    true_nhits = barrel_hits[barrel_hits.particle_id > 0].groupby(
        'particle_id')['hit_id'].count()
    true_particle_ids = true_nhits[true_nhits > 2].index.to_numpy()

    particles = event.particles
    pT_all = particles[particles.particle_id.isin(
        true_particle_ids)].pt.to_numpy()
    pT_sel = particles[particles.particle_id.isin(
        summary['correct_pids'])].pt.to_numpy()
    return pT_all, pT_sel
Exemplo n.º 2
0
 def setup_from_config(self, config_dir):
     config = load_yaml(config_dir)
     evt_dir = config['track_ml']['dir']
     layers = config['doublets_from_cuts']['layers']
     phi_slope_max = config['doublets_from_cuts']['phi_slope_max']
     z0_max = config['doublets_from_cuts']['z0_max']
     min_hits = config['doublets_from_cuts']['min_hits']
     base_outdir = config['doublets_from_cuts']['selected']
     self.setup(evt_dir, phi_slope_max, z0_max, layers, min_hits,
                base_outdir)
Exemplo n.º 3
0
def fraction_of_duplicated_hits(evtid, config_name):
    config = load_yaml(config_name)
    evt_dir = config['track_ml']['dir']
    layers = config['doublets_from_cuts']['layers']

    event = Event(evt_dir, evtid)
    barrel_hits = event.filter_hits(layers)

    # remove noise hits
    barrel_hits = barrel_hits[barrel_hits.particle_id > 0]

    sel = barrel_hits.groupby("particle_id")['layer'].apply(
        lambda x: len(x) - np.unique(x).shape[0]).values
    return sel
Exemplo n.º 4
0
def main():
    """Main function"""

    # Parse the command line
    args = parse_args()
    # Initialize MPI
    rank, n_ranks = init_workers(args.distributed)

    # Load configuration
    from heptrkx import load_yaml
    config = load_yaml(args.config)
    output_dir = os.path.expandvars(config.get('output_dir', None))
    if rank == 0:
        os.makedirs(output_dir, exist_ok=True)
    else:
        output_dir = None

    # Setup logging
    config_logging(verbose=args.verbose, output_dir=output_dir)
    logging.info('Initialized rank %i out of %i', rank, n_ranks)
    if args.show_config and (rank == 0):
        logging.info('Command line config: %s' % args)
    if rank == 0:
        logging.info('Configuration: %s', config)
        logging.info('Saving job outputs to %s', output_dir)

    # Load the datasets
    train_data_loader, valid_data_loader = get_data_loaders(
        distributed=args.distributed, **config['data'])
    logging.info('Loaded %g training samples', len(train_data_loader.dataset))
    if valid_data_loader is not None:
        logging.info('Loaded %g validation samples',
                     len(valid_data_loader.dataset))

    # Load the trainer
    trainer = get_trainer(distributed=args.distributed,
                          output_dir=output_dir,
                          device=args.device,
                          **config['trainer'])
    # Build the model and optimizer
    trainer.build_model(**config.get('model', {}))
    if rank == 0:
        trainer.print_model_summary()

    # Run the training
    summary = trainer.train(train_data_loader=train_data_loader,
                            valid_data_loader=valid_data_loader,
                            **config['training'])
    if rank == 0:
        trainer.write_summaries()

    # Print some conclusions
    n_train_samples = len(train_data_loader.sampler)
    logging.info('Finished training')
    train_time = np.mean(summary['train_time'])
    logging.info('Train samples %g time %g s rate %g samples/s',
                 n_train_samples, train_time, n_train_samples / train_time)
    if valid_data_loader is not None:
        n_valid_samples = len(valid_data_loader.sampler)
        valid_time = np.mean(summary['valid_time'])
        logging.info('Valid samples %g time %g s rate %g samples/s',
                     n_valid_samples, valid_time, n_valid_samples / valid_time)

    # Drop to IPython interactive shell
    if args.interactive and (rank == 0):
        logging.info('Starting IPython interactive session')
        import IPython
        IPython.embed()

    if rank == 0:
        logging.info('All done!')
Exemplo n.º 5
0
def create_evaluator(config_name, iteration, input_ckpt=None):
    """
    @config: configuration for train_nx_graph
    """
    # load configuration file
    all_config = load_yaml(config_name)
    config = all_config['segment_training']
    config_tr = config['parameters']

    batch_size = n_graphs = config_tr['batch_size']  # need optimization
    num_processing_steps_tr = config_tr['n_iters']  ## level of message-passing
    prod_name = config['prod_name']
    if input_ckpt is None:
        input_ckpt = os.path.join(config['output_dir'], prod_name)

    # generate inputs
    generate_input_target = inputs_generator(
        all_config['make_graph']['out_graph'], n_train_fraction=0.8)

    # build TF graph
    tf.compat.v1.reset_default_graph()
    model = get_model(config['model_name'])

    input_graphs, target_graphs = generate_input_target(n_graphs)
    input_ph = utils_tf.placeholders_from_data_dicts(
        input_graphs, force_dynamic_num_graphs=True)
    target_ph = utils_tf.placeholders_from_data_dicts(
        target_graphs, force_dynamic_num_graphs=True)

    output_ops_tr = model(input_ph, num_processing_steps_tr)
    try:
        sess.close()
    except NameError:
        pass

    sess = tf.Session()
    saver = tf.train.Saver()
    if iteration < 0:
        saver.restore(sess, tf.train.latest_checkpoint(input_ckpt))
    else:
        saver.restore(sess,
                      os.path.join(input_ckpt, ckpt_name.format(iteration)))

    def evaluator(input_graphs,
                  target_graphs,
                  use_digraph=False,
                  bidirection=False):
        """
        input is graph tuples, sizes should match batch_size
        """
        feed_dict = {input_ph: input_graphs, target_ph: target_graphs}
        predictions = sess.run({
            "outputs": output_ops_tr,
            "target": target_ph
        },
                               feed_dict=feed_dict)
        output = predictions['outputs'][-1]

        return utils_data.predicted_graphs_to_nxs(output,
                                                  input_graphs,
                                                  target_graphs,
                                                  use_digraph=use_digraph,
                                                  bidirection=bidirection)

    return evaluator
Exemplo n.º 6
0
            '--ckpt',
            default='trained_results/nxgraph_big_test_NOINT/bak',
            help='path that stores checkpoint')
    add_arg(
        '--trkml',
        default='/global/cscratch1/sd/xju/heptrkx/trackml_inputs/train_all',
        help='original tracking data')

    args = parser.parse_args()
    config_file = args.train_config
    input_ckpt = args.ckpt
    iteration = args.nEpoch

    # create the model
    model = create_evaluator(config_file, iteration, input_ckpt)
    config = load_yaml(config_file)

    # prepare for data
    file_dir = config['data']['output_nxgraph_dir']
    base_dir = os.path.join(file_dir, "event00000{}_g{:09d}_INPUT.npz")
    evtid = args.evt
    isec = args.sec

    file_names = []
    section_list = []
    if isec < 0:
        section_patten = base_dir.format(evtid,
                                         0).replace('_g{:09}'.format(0), '*')
        n_sections = int(len(glob.glob(section_patten)))
        file_names = [base_dir.format(evtid, ii) for ii in range(n_sections)]
        section_list = [ii for ii in range(n_sections)]