Exemplo n.º 1
0
def reduce(x,
           coords,
           energy,
           dist,
           nidx,
           rs,
           t_idx,
           t_spectator_weight,
           threshold=0.5,
           print_reduction=True,
           name='reduce',
           trainable=True,
           use_edges=True,
           return_backscatter=False):

    from GravNetLayersRagged import SelectFromIndices, AccumulateNeighbours

    gnidx, gsel, bg, srs = reduce_indices(
        x,
        dist,
        nidx,
        rs,
        t_idx,
        threshold=threshold,
        name=name + '_indices',
        trainable=trainable,
        print_reduction=print_reduction,
        use_edges=use_edges,
        return_backscatter=return_backscatter)

    #these are needed in reduced form
    t_idx, t_spectator_weight = SelectFromIndices()(
        [gsel, t_idx, t_spectator_weight])

    coords = AccumulateNeighbours('mean')([coords, gnidx])
    coords = SelectFromIndices()([gsel, coords])
    energy = AccumulateNeighbours('sum')([energy, gnidx])
    energy = SelectFromIndices()([gsel, energy])
    x = AccumulateNeighbours('minmeanmax')([x, gnidx])
    x = SelectFromIndices()([gsel, x])

    rs = srs  #set new row splits

    return x, coords, energy, rs, bg, t_idx, t_spectator_weight
#print(neighbour_idxs,'\n', row_splits, '\n',hier)

hier = gl_hier
coords = gl_coords
row_splits = gl_row_splits
gidx = global_idxs
scatters = []

bglayer = MultiBackGather()
for i in range(10):

    row_splits, sel, backscatter = applyClustering(K, coords, hier, row_splits)
    bglayer.append(backscatter)
    #scatters.append(backscatter)

    hier, coords, gidx = SelectFromIndices()([sel, hier, coords, gidx])

    coords /= 2.  #shift stuff closer together
    print('row_splits', row_splits.numpy())
    print('coords', coords.shape)
    print('hier', hier.shape)

    #sel_gidx = gidx
    #for k in range(len(scatters)):
    #    l = len(scatters) - k - 1
    #    print('scatters[l]',scatters[l].shape)
    #    sel_gidx = tf.gather_nd(sel_gidx, scatters[l] )
    #    print('scattered to', sel_gidx.shape)

    sel_gidx = bglayer(gidx)
    print('scat to', sel_gidx.shape)
Exemplo n.º 3
0
def pre_selection_staged(
    indict,
    debug_outdir,
    trainable,
    name='pre_selection_add_stage_0',
    debugplots_after=-1,
    reduction_threshold=0.75,
    use_edges=True,
    print_info=False,
    record_metrics=False,
    n_coords=3,
    edge_nodes_0=16,
    edge_nodes_1=8,
):
    '''
    Takes the output of the preselection model and selects again :)
    But the outputs are compatible, this one can be chained
    
    This one uses full blown GravNet
    
    Gets as inputs:
    
    indict['scatterids']
    indict['orig_t_idx'] 
    indict['orig_t_energy'] 
    indict['orig_dim_coords']
    indict['rs']
    indict['orig_row_splits']
    
    indict['features'] 
    indict['orig_features']
    indict['coords'] 
    indict['addfeat']
    indict['energy']
    
    
    indict['t_idx']
    indict['t_energy']
    ... all the truth info
    
    '''

    from GravNetLayersRagged import RaggedGravNet, DistanceWeightedMessagePassing, ElementScaling
    from GravNetLayersRagged import SelectFromIndices, GooeyBatchNorm, MaskTracksAsNoise
    from GravNetLayersRagged import AccumulateNeighbours, KNN, MultiAttentionGravNetAdd
    from LossLayers import LLClusterCoordinates, LLFillSpace
    from DebugLayers import PlotCoordinates
    from MetricsLayers import MLReductionMetrics
    from Regularizers import MeanMaxDistanceRegularizer, AverageDistanceRegularizer

    #assume the inputs are normalised
    rs = indict['rs']
    t_idx = indict['t_idx']

    track_charge = SelectFeatures(2, 3)(
        indict['unproc_features'])  #zero for calo hits
    x = Concatenate()([indict['features'], indict['addfeat']])
    x = Dense(64, activation='elu', trainable=trainable)(x)
    gn_pre_coords = indict['coords']
    gn_pre_coords = ElementScaling(name=name + 'es1',
                                   trainable=trainable)(gn_pre_coords)
    x = Concatenate()([gn_pre_coords, x])

    x, coords, nidx, dist = RaggedGravNet(n_neighbours=32,
                                          n_dimensions=n_coords,
                                          n_filters=64,
                                          n_propagate=64,
                                          coord_initialiser_noise=1e-5,
                                          feature_activation=None,
                                          record_metrics=record_metrics,
                                          use_approximate_knn=True,
                                          use_dynamic_knn=True,
                                          trainable=trainable,
                                          name=name + '_gn1')([x, rs])

    #the two below are mostly running to record metrics and kill very bad coordinate scalings
    dist = MeanMaxDistanceRegularizer(strength=1e-6 if trainable else 0.,
                                      record_metrics=record_metrics)(dist)

    dist = AverageDistanceRegularizer(strength=1e-6 if trainable else 0.,
                                      record_metrics=record_metrics)(dist)

    if debugplots_after > 0:
        coords = PlotCoordinates(debugplots_after,
                                 outdir=debug_outdir,
                                 name=name + '_gn1_coords')(
                                     [coords, indict['energy'], t_idx, rs])

    x = DistanceWeightedMessagePassing([32, 32, 8, 8],
                                       name=name + 'dmp1',
                                       trainable=trainable)([x, nidx, dist])

    x_matt = Dense(16, activation='elu', name=name + '_matt_dense')(x)

    x_matt = MultiAttentionGravNetAdd(5,
                                      name=name + '_att_gn1',
                                      record_metrics=record_metrics)(
                                          [x, x_matt, coords, nidx])
    x = Concatenate()([x, x_matt])
    x = Dense(64, activation='elu', name=name + '_bef_coord_dense')(x)

    coords = Add()([
        Dense(n_coords,
              name=name + '_coord_add_dense',
              kernel_initializer='zeros')(x), coords
    ])
    if debugplots_after > 0:
        coords = PlotCoordinates(debugplots_after,
                                 outdir=debug_outdir,
                                 name=name + '_red_coords')(
                                     [coords, indict['energy'], t_idx, rs])

    nidx, dist = KNN(
        K=16,
        radius='dynamic',  #use dynamic feature
        record_metrics=record_metrics,
        name=name + '_knn',
        min_bins=[7, 7]  #this can be fine grained
    )([coords, rs])

    coords = LLClusterCoordinates(print_loss=print_info,
                                  record_metrics=record_metrics,
                                  active=trainable,
                                  print_batch_time=False,
                                  scale=5.)([coords, t_idx, rs])

    coords = LLFillSpace(
        active=trainable,
        record_metrics=record_metrics,
        scale=0.025,  #just mild
        runevery=-1,  #give it a kick only every now and then - hat's enough
    )([coords, rs])

    unred_rs = rs

    cluster_tidx = MaskTracksAsNoise(active=trainable)([t_idx, track_charge])

    gnidx, gsel, group_backgather, rs = reduce_indices(
        x,
        dist,
        nidx,
        rs,
        cluster_tidx,
        threshold=reduction_threshold,
        print_reduction=print_info,
        trainable=trainable,
        name=name + '_reduce_indices',
        use_edges=use_edges,
        edge_nodes_0=edge_nodes_0,
        edge_nodes_1=edge_nodes_1,
        return_backscatter=False)

    gsel = MLReductionMetrics(name=name + '_reduction', record_metrics=True)(
        [gsel, t_idx, indict['t_energy'], unred_rs, rs])

    selfeat = SelectFromIndices()([gsel, indict['features']])
    unproc_features = SelectFromIndices()([gsel, indict['unproc_features']])

    energy = indict['energy']

    x = AccumulateNeighbours('minmeanmax')([x, gnidx, energy])
    x = SelectFromIndices()([gsel, x])
    #add more useful things
    coords = AccumulateNeighbours('mean')([coords, gnidx, energy])
    coords = SelectFromIndices()([gsel, coords])
    phys_coords = AccumulateNeighbours('mean')(
        [indict['phys_coords'], gnidx, energy])
    phys_coords = SelectFromIndices()([gsel, phys_coords])

    energy = AccumulateNeighbours('sum')([energy, gnidx])
    energy = SelectFromIndices()([gsel, energy])

    out = {}
    out['not_noise_score'] = AccumulateNeighbours('mean')(
        [indict['not_noise_score'], gnidx])
    out['not_noise_score'] = SelectFromIndices()(
        [gsel, out['not_noise_score']])

    out['scatterids'] = indict['scatterids'] + [group_backgather
                                                ]  #append new selection

    #re-build standard feature layout
    out['features'] = selfeat
    out['unproc_features'] = unproc_features
    out['coords'] = coords
    out['phys_coords'] = phys_coords
    out['addfeat'] = GooeyBatchNorm(name=name + '_gooey_norm',
                                    trainable=trainable)(x)  #norm them
    out['energy'] = energy
    out['rs'] = rs

    for k in indict.keys():
        if 't_' == k[0:2]:
            out[k] = SelectFromIndices()([gsel, indict[k]])

    #some pass throughs:
    out['orig_dim_coords'] = indict['orig_dim_coords']
    out['orig_t_idx'] = indict['orig_t_idx']
    out['orig_t_energy'] = indict['orig_t_energy']
    out['orig_row_splits'] = indict['orig_row_splits']

    #check
    anymissing = False
    for k in indict.keys():
        if not k in out.keys():
            anymissing = True
            print(k, 'missing')
    if anymissing:
        raise ValueError("key not found")

    return out
Exemplo n.º 4
0
def pre_selection_model_full(
    orig_inputs,
    debug_outdir='',
    trainable=False,
    name='pre_selection',
    debugplots_after=-1,
    reduction_threshold=0.75,
    noise_threshold=0.025,
    use_edges=True,
    n_coords=3,
    pass_through=False,
    print_info=False,
    record_metrics=False,
    omit_reduction=False,  #only trains coordinate transform. useful for pretrain phase
    use_multigrav=True,
    eweighted=True,
):

    from GravNetLayersRagged import AccumulateNeighbours, SelectFromIndices
    from GravNetLayersRagged import SortAndSelectNeighbours, NoiseFilter
    from GravNetLayersRagged import CastRowSplits, ProcessFeatures
    from GravNetLayersRagged import GooeyBatchNorm, MaskTracksAsNoise
    from DebugLayers import PlotCoordinates
    from LossLayers import LLClusterCoordinates, LLNotNoiseClassifier, LLFillSpace
    from MetricsLayers import MLReductionMetrics

    rs = CastRowSplits()(orig_inputs['row_splits'])
    t_idx = orig_inputs['t_idx']

    orig_processed_features = ProcessFeatures()(orig_inputs['features'])
    x = orig_processed_features
    energy = SelectFeatures(0, 1)(orig_inputs['features'])
    coords = SelectFeatures(5, 8)(x)
    track_charge = SelectFeatures(2, 3)(
        orig_inputs['features'])  #zero for calo hits
    phys_coords = coords

    # here the actual network starts
    if debugplots_after > 0:
        coords = PlotCoordinates(debugplots_after,
                                 outdir=debug_outdir,
                                 name=name +
                                 '_initial')([coords, energy, t_idx, rs])
    ############## Keep this part to reload the noise filter with pre-trained weights for other trainings

    out = {}
    if pass_through:  #do nothing but make output compatible
        for k in orig_inputs.keys():
            out[k] = orig_inputs[k]
        out['features'] = x
        out['coords'] = coords
        out['addfeat'] = x  #add more
        out['energy'] = energy
        out['not_noise_score'] = Dense(1, name=name + '_passthrough_noise')(x)
        out['orig_t_idx'] = orig_inputs['t_idx']
        out['orig_t_energy'] = orig_inputs['t_energy']  #for validation
        out['orig_dim_coords'] = coords
        out['rs'] = rs
        out['orig_row_splits'] = rs
        return out

    #this takes O(200ms) for 100k hits
    coords, nidx, dist, x = first_coordinate_adjustment(
        coords,
        x,
        energy,
        rs,
        t_idx,
        debug_outdir,
        trainable=trainable,
        name=name + '_first_coords',
        debugplots_after=debugplots_after,
        n_coords=n_coords,
        record_metrics=record_metrics,
        use_multigrav=use_multigrav)
    #create the gradients
    coords = LLClusterCoordinates(print_loss=trainable and print_info,
                                  active=trainable,
                                  print_batch_time=False,
                                  record_metrics=record_metrics,
                                  scale=5.)([coords, t_idx, rs])

    if debugplots_after > 0:
        coords = PlotCoordinates(debugplots_after,
                                 outdir=debug_outdir,
                                 name=name +
                                 '_bef_red')([coords, energy, t_idx, rs])

    if omit_reduction:
        return {'coords': coords, 'dist': dist, 'x': x}

    dist, nidx = SortAndSelectNeighbours(K=16)(
        [dist, nidx])  #only run reduction on 12 closest
    '''
    run a full reduction block
    return the noise score in addition - don't select yet
    
    do not cluster tracks with anything here
    '''

    cluster_tidx = MaskTracksAsNoise(active=trainable)([t_idx, track_charge])

    unred_rs = rs
    gnidx, gsel, group_backgather, rs = reduce_indices(
        x,
        dist,
        nidx,
        rs,
        cluster_tidx,
        threshold=reduction_threshold,
        print_reduction=print_info,
        trainable=trainable,
        name=name + '_reduce_indices',
        use_edges=use_edges,
        record_metrics=record_metrics,
        return_backscatter=False)

    gsel = MLReductionMetrics(name=name + '_reduction_0',
                              record_metrics=record_metrics)([
                                  gsel, t_idx, orig_inputs['t_energy'],
                                  unred_rs, rs
                              ])

    #do it explicitly

    #selfeat = orig_inputs['features']
    selfeat = SelectFromIndices()([gsel, orig_processed_features])
    unproc_features = SelectFromIndices()([gsel, orig_inputs['features']])

    #save for later
    orig_dim_coords = coords

    energy_weight = energy
    if not eweighted:
        energy_weight = OnesLike()(energy)

    x = AccumulateNeighbours('minmeanmax')([x, gnidx, energy_weight])
    x = SelectFromIndices()([gsel, x])
    #add more useful things
    coords = AccumulateNeighbours('mean')([coords, gnidx, energy_weight])
    coords = SelectFromIndices()([gsel, coords])

    phys_coords = AccumulateNeighbours('mean')(
        [phys_coords, gnidx, energy_weight])
    phys_coords = SelectFromIndices()([gsel, phys_coords])

    energy = AccumulateNeighbours('sum')([energy, gnidx])
    energy = SelectFromIndices()([gsel, energy])

    #re-build standard feature layout
    out['features'] = selfeat
    out['unproc_features'] = unproc_features
    out['coords'] = coords
    out['phys_coords'] = phys_coords
    out['addfeat'] = GooeyBatchNorm(trainable=trainable)(x)  #norm them
    out['energy'] = energy

    ## all the truth
    for k in orig_inputs.keys():
        if 't_' == k[0:2]:
            out[k] = SelectFromIndices()([gsel, orig_inputs[k]])

    #debug
    if debugplots_after > 0:
        out['coords'] = PlotCoordinates(debugplots_after,
                                        outdir=debug_outdir,
                                        name=name + '_after_red')([
                                            out['coords'], out['energy'],
                                            out['t_idx'], rs
                                        ])

    ######## below is noise classifier

    #this does not work, but also might not be an issue for the studies
    #out['backscatter']=bg

    isnotnoise = Dense(
        1,
        activation='sigmoid',
        trainable=trainable,
        name=name + '_noisescore_d1',
    )(Concatenate()([out['addfeat'], out['coords']]))
    isnotnoise = LLNotNoiseClassifier(
        print_loss=trainable and print_info,
        scale=1.,
        active=trainable,
        record_metrics=record_metrics,
    )([isnotnoise, out['t_idx']])

    unred_rs = rs
    sel, rs, noise_backscatter = NoiseFilter(
        threshold=noise_threshold,  #high signal efficiency filter
        print_reduction=print_info,
        record_metrics=record_metrics)([isnotnoise, rs])

    out['not_noise_score'] = isnotnoise

    for k in out.keys():
        out[k] = SelectFromIndices()([sel, out[k]])

    out['coords'] = LLFillSpace(
        print_loss=trainable and print_info,
        active=trainable,
        record_metrics=record_metrics,
        scale=0.025,  #just mild
        runevery=-1,  #give it a kick only every now and then - hat's enough
    )([out['coords'], rs])

    out['scatterids'] = [group_backgather,
                         noise_backscatter]  #add them here directly
    out['orig_t_idx'] = orig_inputs['t_idx']
    out['orig_t_energy'] = orig_inputs['t_energy']  #for validation
    out['orig_dim_coords'] = orig_dim_coords
    out['rs'] = rs
    out['orig_row_splits'] = orig_inputs['row_splits']
    '''
    So we have the following outputs at this stage:
    
    out['group_backgather']
    out['noise_backscatter_N']
    out['noise_backscatter_idx']
    out['orig_t_idx'] 
    out['orig_t_energy'] 
    out['orig_dim_coords']
    out['rs']
    out['orig_row_splits']
    
    out['features'] 
    out['unproc_features']
    out['coords'] 
    out['addfeat']
    out['energy']
    
    '''

    return out
Exemplo n.º 5
0
    def metrics_call(self, inputs):
        assert len(inputs) == 5
        gsel, tidx, ten, rs, srs = inputs
        #tf.assert_equal(tidx.shape,ten.shape)#safety

        alltruthcount = None
        seltruthcount = None
        nonoisecounts_bef = []
        nonoisecounts_after = []

        if rs.shape[0] is None:
            return

        stidx, sten = tf.constant([[0]],
                                  dtype='int32'), tf.constant([[0.]],
                                                              dtype='float32')

        if self.active:
            stidx, sten = SelIdx.raw_call(gsel, [tidx, ten])
            for i in tf.range(rs.shape[0] - 1):
                u, _, c = tf.unique_with_counts(tidx[rs[i]:rs[i + 1], 0])
                nonoisecounts_bef.append(c[u >= 0])
                if alltruthcount is None:
                    alltruthcount = u.shape[0]
                else:
                    alltruthcount += u.shape[0]

                u, _, c = tf.unique_with_counts(stidx[srs[i]:srs[i + 1], 0])
                nonoisecounts_after.append(c[u >= 0])
                if seltruthcount is None:
                    seltruthcount = u.shape[0]
                else:
                    seltruthcount += u.shape[0]

        nonoisecounts_bef = tf.concat(nonoisecounts_bef, axis=0)
        nonoisecounts_after = tf.concat(nonoisecounts_after, axis=0)

        lostfraction = 1. - tf.cast(seltruthcount, dtype='float32') / (tf.cast(
            alltruthcount, dtype='float32'))
        self.add_prompt_metric(lostfraction, self.name + '_lost_objects')
        #done with fractions

        #for simplicity assume that no energy is an exact duplicate (definitely good enough here)

        ue, _ = tf.unique(ten[:, 0])
        uesel, _ = tf.unique(sten[:, 0])

        allen = tf.concat([ue, uesel], axis=0)
        ue, _, c = tf.unique_with_counts(allen)

        lostenergies = ue[c < 2]
        #print(lostenergies)

        self.add_prompt_metric(tf.reduce_mean(nonoisecounts_bef),
                               self.name + '_hits_pobj_bef_mean')
        self.add_prompt_metric(tf.reduce_max(nonoisecounts_bef),
                               self.name + '_hits_pobj_bef_max')

        self.add_prompt_metric(tf.reduce_mean(nonoisecounts_after),
                               self.name + '_hits_pobj_after_mean')
        self.add_prompt_metric(tf.reduce_max(nonoisecounts_after),
                               self.name + '_hits_pobj_after_max')

        self.add_prompt_metric(tf.reduce_mean(lostenergies),
                               self.name + '_lost_energy_mean')
        self.add_prompt_metric(tf.reduce_max(lostenergies),
                               self.name + '_lost_energy_max')

        reduced_to_fraction = tf.cast(srs[-1], dtype='float32') / tf.cast(
            rs[-1], dtype='float32')
        self.add_prompt_metric(reduced_to_fraction, self.name + '_reduction')