def raw_loss(score, nidx, tidx, specweight): # nidx = V x K # tidx = V x 1 # specweight: V x 1 # score: V x K-1 x 1 n_tidxs = SelectWithDefault(nidx, tidx, -1) # V x K x 1 tf.assert_equal(tidx, n_tidxs[:, 0]) #check that the nidxs have self-reference #int32 goes up to -2.something e-9 n_tidxs = tf.where(n_tidxs < 0, -1000000000, n_tidxs) #set to -V for noise n_active = tf.where(nidx >= 0, tf.ones_like(nidx, dtype='float32'), 0.)[:, 1:] # V x K-1 specweight = tf.clip_by_value(specweight, 0., 1.) n_specw = SelectWithDefault(nidx, specweight, -1.)[:, 1:, 0] # V x K-1 #now this will be false for all noise n_sameasprobe = tf.cast(tf.expand_dims(tidx, axis=2) == n_tidxs[:, 1:, :], dtype='float32') # V x K-1 x 1 lossval = tf.keras.losses.binary_crossentropy(n_sameasprobe, score) # V x K-1 lossval *= n_active lossval *= (1. - 0.9 * n_specw ) #reduce spectators, but don't remove them lossval = tf.math.divide_no_nan(tf.reduce_sum(lossval, axis=1), tf.reduce_sum(n_active, axis=1)) # V lossval *= (1. - 0.9 * specweight[:, 0]) #V return tf.reduce_mean(lossval)
def _rs_loop(coords, tidx): Msel, M_not, N_per_obj = CreateMidx(tidx, calc_m_not=True) #N_per_obj: K x 1 if N_per_obj is None: return 0., 0., 0. #no objects, discard N_per_obj = tf.cast(N_per_obj, dtype='float32') N_tot = tf.cast(tidx.shape[0], dtype='float32') K = tf.cast(Msel.shape[0], dtype='float32') padmask_m = SelectWithDefault(Msel, tf.ones_like(coords[:, 0:1]), 0.) # K x V' x 1 coords_m = SelectWithDefault(Msel, coords, 0.) # K x V' x C #create average av_coords_m = tf.reduce_sum(coords_m * padmask_m, axis=1) # K x C av_coords_m = tf.math.divide_no_nan(av_coords_m, N_per_obj) #K x C av_coords_m = tf.expand_dims(av_coords_m, axis=1) ##K x 1 x C distloss = tf.reduce_sum((av_coords_m - coords_m)**2, axis=2) distloss = tf.math.log(tf.math.exp(1.) * distloss + 1.) * padmask_m[:, :, 0] distloss = tf.math.divide_no_nan(tf.reduce_sum(distloss, axis=1), N_per_obj[:, 0]) #K distloss = tf.math.divide_no_nan(tf.reduce_sum(distloss), K) repdist = tf.expand_dims(coords, axis=0) - av_coords_m #K x V x C repdist = tf.reduce_sum(repdist**2, axis=-1, keepdims=True) #K x V x 1 reploss = M_not * tf.exp(-repdist) #K x V x 1 #downweight noise reploss *= tf.expand_dims( (1. - 0.9 * tf.cast(tidx < 0, dtype='float32')), axis=0) reploss = tf.reduce_sum(reploss, axis=1) / (N_tot - N_per_obj) #K x 1 reploss = tf.reduce_sum(reploss) / (K + 1e-3) return distloss + reploss, distloss, reploss
def AccumulateKnn(distances, features, indices, mean_and_max=True): ''' .Output("out_features: float32") .Output("out_max_idxs: int32"); Assumes that neighbour indices can be padded with -1, but not mixed, e.g. [1,4,-1,2] needs to be [1,4,2,-1] Other than the padding, the indices must be unique ''' #compatibility distances = tf.exp(-distances) if not gl.acc_ops_use_tf_gradients: return _accknn_op.AccumulateKnn(distances=distances, features=features, indices=indices, n_moments=0, mean_and_max=mean_and_max) distances = tf.expand_dims(distances, axis=2) #V x K x 1 nfeat = SelectWithDefault(indices, features, 0.) # V x K x F wfeat = distances * nfeat fmean = tf.reduce_mean(wfeat, axis=1) # V x F fmax = tf.reduce_max(wfeat, axis=1) fout = fmean if mean_and_max: fout = tf.concat([fmean, fmax], axis=1) return fout, None
def AccumulateLinKnn(weights, features, indices, mean_and_max=True): ''' Accumulates neighbour features with linear weights (not exp(-w) as AccumulateKnn) ''' if not gl.acc_ops_use_tf_gradients: return _accknn_op.AccumulateKnn(distances=weights, features=features, indices=indices, n_moments=0, mean_and_max=mean_and_max) weights = tf.expand_dims(weights, axis=2) #V x K x 1 nfeat = SelectWithDefault(indices, features, 0.) # V x K x F wfeat = weights * nfeat fmean = tf.reduce_mean(wfeat, axis=1) # V x F fmax = tf.reduce_max(wfeat, axis=1) fout = fmean if mean_and_max: fout = tf.concat([fmean, fmax], axis=1) return fout, None
def raw_loss(score, nidx, tidxs, specweights): # score: V x 1 # nidx: V x K # tidxs: V x 1 # specweight: V x 1 n_tidxs = SelectWithDefault(nidx, tidxs, -1)[:, :, 0] # V x K tf.assert_equal( tidxs, n_tidxs[:, 0:1] ) #sanity check to make sure the self reference is in the nidxs n_tidxs = tf.where(n_tidxs < 0, -10, n_tidxs) #set noise to -10 #the actual check n_good = tf.cast(n_tidxs == tidxs, dtype='float32') #noise is always bad #downweight spectators but don't set them to zero n_active = tf.where(nidx >= 0, tf.ones_like(nidx, dtype='float32'), 0.) # V x K truthscore = tf.math.divide_no_nan( tf.reduce_sum(n_good, axis=1, keepdims=True), tf.reduce_sum(n_active, axis=1, keepdims=True)) #V x 1 #cut at 90% same truthscore = tf.where(truthscore > 0.9, 1., truthscore * 0.) #V x 1 lossval = tf.keras.losses.binary_crossentropy(truthscore, score) #V specweights = specweights[:, 0] #V isnotnoise = tf.cast(tidxs >= 0, dtype='float32')[:, 0] #V obj_lossval = tf.math.divide_no_nan( tf.reduce_sum(specweights * isnotnoise * lossval), tf.reduce_sum(specweights * isnotnoise)) noise_lossval = tf.math.divide_no_nan( tf.reduce_sum((1. - isnotnoise) * lossval), tf.reduce_sum(1. - isnotnoise)) lossval = obj_lossval + 0.1 * noise_lossval #noise doesn't really matter so much return lossval
def raw_loss(dist, nidxs, tidxs, specweight, print_loss, name): sel_tidxs = SelectWithDefault(nidxs, tidxs, -1)[:, :, 0] sel_spec = SelectWithDefault(nidxs, specweight, 1.)[:, :, 0] active = tf.where(nidxs >= 0, tf.ones_like(dist), 0.) notspecmask = 1. #(1. - 0.5*sel_spec)#only reduce spec #tf.where(sel_spec>0, 0., tf.ones_like(dist)) probe_is_notnoise = tf.cast(tidxs >= 0, dtype='float32')[:, 0] #V notnoisemask = tf.where(sel_tidxs < 0, 0., tf.ones_like(dist)) notnoiseweight = notnoisemask + (1. - notnoisemask) * 0.01 #notspecmask *= notnoisemask#noise can never be spec #mask spectators sameasprobe = tf.cast(sel_tidxs[:, 0:1] == sel_tidxs, dtype='float32') #sameasprobe *= notnoisemask #always push away noise, also from each other #only not noise can be attractive attmask = sameasprobe * notspecmask * active repmask = (1. - sameasprobe) * notspecmask * active attr = tf.math.log(tf.math.exp(1.) * dist + 1.) * attmask rep = tf.exp( -dist ) * repmask * notnoiseweight # 1./(dist+1.) * repmask #2.*tf.exp(-3.16*tf.sqrt(dist+1e-6)) * repmask #1./(dist+0.1) nattneigh = tf.reduce_sum(attmask, axis=1) nrepneigh = tf.reduce_sum(repmask, axis=1) attloss = probe_is_notnoise * tf.reduce_sum( attr, axis=1 ) #tf.math.divide_no_nan(tf.reduce_sum(attr,axis=1), nattneigh)#same is always 0 attloss = tf.math.divide_no_nan(attloss, nattneigh) reploss = probe_is_notnoise * tf.reduce_sum( rep, axis=1 ) #tf.math.divide_no_nan(tf.reduce_sum(rep,axis=1), nrepneigh) reploss = tf.math.divide_no_nan(reploss, nrepneigh) #noise does not actively contribute lossval = attloss + reploss lossval = tf.math.divide_no_nan( tf.reduce_sum(probe_is_notnoise * lossval), tf.reduce_sum(probe_is_notnoise)) if print_loss: avattdist = probe_is_notnoise * tf.math.divide_no_nan( tf.reduce_sum(attmask * tf.sqrt(dist), axis=1), nattneigh) avattdist = tf.reduce_sum(avattdist) / tf.reduce_sum( probe_is_notnoise) avrepdist = probe_is_notnoise * tf.math.divide_no_nan( tf.reduce_sum(repmask * tf.sqrt(dist), axis=1), nrepneigh) avrepdist = tf.reduce_sum(avrepdist) / tf.reduce_sum( probe_is_notnoise) if hasattr(lossval, "numpy"): print( name, 'loss', lossval.numpy(), 'mean att neigh', tf.reduce_mean(nattneigh).numpy(), 'mean rep neigh', tf.reduce_mean(nrepneigh).numpy(), 'att', tf.reduce_mean(probe_is_notnoise * attloss).numpy(), 'rep', tf.reduce_mean(probe_is_notnoise * reploss).numpy(), 'dist (same)', avattdist.numpy(), 'dist (other)', avrepdist.numpy(), ) else: tf.print(name, 'loss', lossval, 'mean att neigh', tf.reduce_mean(nattneigh), 'mean rep neigh', tf.reduce_mean(nrepneigh)) return lossval
def oc_per_batch_element( beta, x, q_min, object_weights, # V x 1 !! truth_idx, is_spectator, payload_loss, S_B=1., payload_weight_function = None, #receives betas as K x V x 1 as input, and a threshold val payload_weight_threshold = 0.8, use_mean_x = 0., cont_beta_loss=False, prob_repulsion=False, phase_transition=False, phase_transition_double_weight=False, alt_potential_norm=False, cut_payload_beta_gradient=False, kalpha_damping_strength=0. ): ''' all inputs V x X , where X can be 1 ''' if not alt_potential_norm: raise ValueError("not alt_potential_norm not implemented") if not prob_repulsion: raise ValueError("not prob_repulsion not implemented") if not phase_transition: raise ValueError("not phase_transition not implemented") if phase_transition_double_weight: raise ValueError("phase_transition_double_weight not implemented") if cont_beta_loss: raise ValueError("cont_beta_loss not implemented") if payload_weight_function is not None: raise ValueError("payload_weight_function not implemented") #set all spectators invalid here, everything scales with beta, so: beta_in = beta beta = tf.clip_by_value(beta, 0.,1.-1e-4) beta *= (1. - is_spectator) qraw = tf.math.atanh(beta)**2 q = qraw + q_min * (1. - is_spectator) # V x 1 #q = tf.where(beta_in<1.-1e-4, q, tf.math.atanh(1.-1e-4)**2 + q_min + beta_in) #just give the rest above clip a gradient N = tf.cast(beta.shape[0], dtype='float32') is_noise = tf.where(truth_idx<0, tf.zeros_like(truth_idx,dtype='float32'), 1.)#V x 1 Msel, M_not, N_per_obj = CreateMidx(truth_idx, calc_m_not=True) N_per_obj = tf.cast(N_per_obj, dtype='float32') # K x 1 K = tf.cast(Msel.shape[0], dtype='float32') padmask_m = SelectWithDefault(Msel, tf.zeros_like(beta_in)+1., 0) #K x V-obj x 1 x_m = SelectWithDefault(Msel, x, 0.) #K x V-obj x C beta_m = SelectWithDefault(Msel, beta_in, 0.) #K x V-obj x 1 q_m = SelectWithDefault(Msel, q, 0.)#K x V-obj x 1 object_weights_m = SelectWithDefault(Msel, object_weights, 0.) kalpha_m = tf.argmax(beta_m, axis=1) # K x 1 x_kalpha_m = tf.gather_nd(x_m,kalpha_m, batch_dims=1) # K x C if use_mean_x>0: x_kalpha_m_m = tf.reduce_sum(q_m * x_m * padmask_m,axis=1) # K x C x_kalpha_m_m = tf.math.divide_no_nan(x_kalpha_m_m, tf.reduce_sum(q_m * padmask_m, axis=1)+1e-9) x_kalpha_m = use_mean_x * x_kalpha_m_m + (1. - use_mean_x)*x_kalpha_m if kalpha_damping_strength > 0: x_kalpha_m = kalpha_damping_strength * tf.stop_gradient(x_kalpha_m) + (1. - kalpha_damping_strength)*x_kalpha_m q_kalpha_m = tf.gather_nd(q_m,kalpha_m, batch_dims=1) # K x 1 beta_kalpha_m = tf.gather_nd(beta_m,kalpha_m, batch_dims=1) # K x 1 object_weights_kalpha_m = tf.gather_nd(object_weights_m,kalpha_m, batch_dims=1) # K x 1 distancesq_m = tf.reduce_sum( (tf.expand_dims(x_kalpha_m, axis=1) - x_m)**2, axis=-1, keepdims=True) #K x V-obj x 1 V_att = q_m * tf.expand_dims(q_kalpha_m,axis=1) * distancesq_m #K x V-obj x 1 V_att = V_att * tf.expand_dims(object_weights_kalpha_m,axis=1) #K x V-obj x 1 V_att = tf.math.divide_no_nan(tf.reduce_sum(padmask_m * V_att,axis=1), N_per_obj+1e-9) # K x 1 V_att = tf.math.divide_no_nan(tf.reduce_sum(V_att,axis=0), K+1e-9) # 1 #now the bit that needs Mnot V_rep = tf.expand_dims(x_kalpha_m, axis=1) #K x 1 x C V_rep = V_rep - tf.expand_dims(x, axis=0) #K x V x C V_rep = tf.reduce_sum(V_rep**2, axis=-1, keepdims=True) #K x V x 1 V_rep = -2.*tf.math.log(1.-tf.math.exp(-V_rep/2.)+1e-5) V_rep *= M_not * tf.expand_dims(q, axis=0) #K x V x 1 V_rep = tf.reduce_sum(V_rep, axis=1) #K x 1 V_rep *= object_weights_kalpha_m * q_kalpha_m #K x 1 V_rep = tf.math.divide_no_nan(V_rep, tf.expand_dims(tf.expand_dims(N,axis=0),axis=0) - N_per_obj+1e-9) # K x 1 V_rep = tf.math.divide_no_nan(tf.reduce_sum(V_rep,axis=0), K+1e-9) # 1 ## beta terms B_pen = - tf.reduce_sum(padmask_m * 1./(20.*distancesq_m + 1.),axis=1) # K x 1 B_pen += 1. #remove self-interaction term (just for offset) B_pen *= object_weights_kalpha_m * beta_kalpha_m B_pen = tf.math.divide_no_nan(B_pen, N_per_obj+1e-9) # K x 1 #now 'standard' 1-beta B_pen -= 0.2*object_weights_kalpha_m * tf.math.sqrt(beta_kalpha_m+1e-6) #another "-> 1, but slower" per object B_pen = tf.math.divide_no_nan(tf.reduce_sum(B_pen,axis=0), K+1e-9) # 1 too_much_B_pen = tf.constant([0.],dtype='float32') Noise_pen = S_B*tf.math.divide_no_nan(tf.reduce_sum(is_noise * beta_in), tf.reduce_sum(is_noise)) #explicit payload weight function here, the old one was odd p_w = tf.math.atanh(padmask_m * tf.clip_by_value(beta_m, 1e-4, 1.-1e-4))**2 #already zero-padded , K x V_perobj x 1 p_w = tf.math.divide_no_nan(p_w, tf.reduce_max(p_w, axis=1, keepdims=True)+1e-9) #normalise to maximum; this + 1e-9 might be an issue POSSIBLE FIXME if cut_payload_beta_gradient: p_w = tf.stop_gradient(p_w) payload_loss_m = p_w * SelectWithDefault(Msel, payload_loss, 0.) #K x V_perobj x P payload_loss_m = tf.reduce_sum(payload_loss_m, axis=1) pll = tf.math.divide_no_nan(payload_loss_m, N_per_obj+1e-9) # K x P pll = tf.math.divide_no_nan(tf.reduce_sum(pll,axis=0), K+1e-9) # P return V_att, V_rep, Noise_pen, B_pen, pll, too_much_B_pen
def SlicingKnn(K : int, coords, row_splits, features_to_bin_on=None, n_bins=None, bin_width=None, return_n_bins: bool=False, min_bins=[3,3]): ''' Perform kNN search with slicing method @type K: int @param K: number of neighbours to search for @type coords: tf.Tensor @param coords: coordinate tensor @type row_splits: tf.Tensor @param row_splits: row splits tensor @type features_to_bin_on: Tuple[int, int] @param features_to_bin_on: indices of features to bin on @type n_bins: Tuple[int, int] @param n_bins: number of bins to split phase space for kNN search @type bin_width: Tuple[float, float] or Tuple[tf.Variable, tf.Variable] @param bin_width: width of phase-space bins @type return_n_bins: bool @param return_n_bins: also returns the total number of bins used @type min_bins: list @param min_bins: minimum binning (in 2D) ''' # start_time_int = time.time() # type and values check for input parameters check_tuple(features_to_bin_on,"features_to_bin_on",int) n_features = coords.shape[1] if (features_to_bin_on[0]>=n_features) or (features_to_bin_on[1]>=n_features) or (features_to_bin_on[0]==features_to_bin_on[1]): raise ValueError("Value error for <features_to_bin_on>!") if ((n_bins is None) and (bin_width is None)) or ((n_bins is not None) and (bin_width is not None)): raise ValueError("Specify either <n_bins> OR <bin_width> argument but not both!") if n_bins is None: check_tuple(bin_width,"bin_width",(float,tf.Variable),checkValue=not isinstance(bin_width,tf.Variable)) else: check_tuple(n_bins,"n_bins",int) # select only 2 dimensions that will be used for binning r_coords = tf.gather(coords,features_to_bin_on,axis=1) # find min/max of selected coordinates r_coords = tf.transpose(r_coords) # since tf.map_fn apply fn to each element unstacked on axis 0 r_max = tf.map_fn(tf.math.reduce_max, r_coords, fn_output_signature=tf.float32) r_min = tf.map_fn(tf.math.reduce_min, r_coords, fn_output_signature=tf.float32) # add safety margin to the phase-space for binning r_diff = tf.add(r_max,-1*r_min) r_max = tf.add(r_max,0.00001*r_diff) r_min = tf.add(r_min,-0.00001*r_diff) r_diff = tf.add(r_max,-1*r_min) # calculate n_bins if bin_width is given if bin_width is not None: if not isinstance(bin_width[0], tf.Variable): #already checked both are the same bin_width = tf.constant(bin_width) else: bin_width = [tf.expand_dims(a,axis=0) for a in bin_width] bin_width = tf.concat(bin_width,axis=0) _n_bins = tf.math.maximum(tf.constant(min_bins, dtype=tf.int32), tf.math.minimum( tf.cast(tf.math.ceil(tf.multiply(r_diff,1.0/bin_width)),tf.int32), tf.constant([50,50], dtype=tf.int32))) # limit the number of bins to min 3x3 and max 50x50 else: _n_bins = tf.constant(n_bins, dtype=tf.int32) # cast tuple to Tensor to match required argument type idx, dist = _nknn_op.SlicingKnn(n_neighbours=K, coords=coords, row_splits=row_splits, n_bins=_n_bins, features_to_bin_on=features_to_bin_on, coord_min=r_min, coord_max=r_max) with tf.control_dependencies([ tf.assert_equal(tf.range(tf.shape(idx)[0]), idx[:,0]), tf.assert_less(idx, row_splits[-1]), tf.assert_less(-2, idx) ]): if gl.knn_ops_use_tf_gradients: ncoords = SelectWithDefault(idx, coords, 0.) dist = (ncoords[:,0:1,:]-ncoords)**2 dist = tf.reduce_sum(dist,axis=2) dist = tf.where(idx<0, 0., dist) if return_n_bins: return idx, dist, tf.reduce_prod(_n_bins) return idx, dist
def oc_per_batch_element( beta, x, q_min, object_weights, # V x 1 !! truth_idx, is_spectator, payload_loss, S_B=1., distance_scale=None, payload_weight_function=None, #receives betas as K x V x 1 as input, and a threshold val payload_weight_threshold=0.8, use_mean_x=0., cont_beta_loss=False, prob_repulsion=False, phase_transition=False, phase_transition_double_weight=False, alt_potential_norm=False, payload_beta_gradient_damping_strength=0., kalpha_damping_strength=0., beta_gradient_damping=0., soft_q_scaling=True, weight_by_q=False, repulsion_q_min=-1., super_repulsion=False): ''' all inputs V x X , where X can be 1 ''' if not alt_potential_norm: raise ValueError("not alt_potential_norm not implemented") if not prob_repulsion: raise ValueError("not prob_repulsion not implemented") if not phase_transition: raise ValueError("not phase_transition not implemented") if phase_transition_double_weight: raise ValueError("phase_transition_double_weight not implemented") if cont_beta_loss: raise ValueError("cont_beta_loss not implemented") if payload_weight_function is not None: raise ValueError("payload_weight_function not implemented") #set all spectators invalid here, everything scales with beta, so: if beta_gradient_damping > 0.: beta = beta_gradient_damping * tf.stop_gradient(beta) + ( 1. - beta_gradient_damping) * beta beta_in = beta beta = tf.clip_by_value(beta, 0., 1. - 1e-4) beta *= (1. - is_spectator) qraw = tf.math.atanh(beta)**2 if soft_q_scaling: qraw = tf.math.atanh(beta / 1.002)**2 #beta_in**4 *20. beta = beta_in * (1. - is_spectator) # no need for clipping q = qraw + q_min * (1. - is_spectator) # V x 1 #q = tf.where(beta_in<1.-1e-4, q, tf.math.atanh(1.-1e-4)**2 + q_min + beta_in) #just give the rest above clip a gradient N = tf.cast(beta.shape[0], dtype='float32') is_noise = tf.where(truth_idx < 0, tf.zeros_like(truth_idx, dtype='float32') + 1., 0.) #V x 1 Msel, M_not, N_per_obj = CreateMidx(truth_idx, calc_m_not=True) N_per_obj = tf.cast(N_per_obj, dtype='float32') # K x 1 K = tf.cast(Msel.shape[0], dtype='float32') padmask_m = SelectWithDefault(Msel, tf.zeros_like(beta_in) + 1., 0) #K x V-obj x 1 x_m = SelectWithDefault(Msel, x, 0.) #K x V-obj x C beta_m = SelectWithDefault(Msel, beta_in, 0.) #K x V-obj x 1 q_m = SelectWithDefault(Msel, q, 0.) #K x V-obj x 1 object_weights_m = SelectWithDefault(Msel, object_weights, 0.) distance_scale_m = SelectWithDefault(Msel, distance_scale, 1.) kalpha_m = tf.argmax(beta_m, axis=1) # K x 1 x_kalpha_m = tf.gather_nd(x_m, kalpha_m, batch_dims=1) # K x C if use_mean_x > 0: x_kalpha_m_m = tf.reduce_sum(q_m * x_m * padmask_m, axis=1) # K x C x_kalpha_m_m = tf.math.divide_no_nan( x_kalpha_m_m, tf.reduce_sum(q_m * padmask_m, axis=1) + 1e-9) x_kalpha_m = use_mean_x * x_kalpha_m_m + (1. - use_mean_x) * x_kalpha_m if kalpha_damping_strength > 0: x_kalpha_m = kalpha_damping_strength * tf.stop_gradient(x_kalpha_m) + ( 1. - kalpha_damping_strength) * x_kalpha_m q_kalpha_m = tf.gather_nd(q_m, kalpha_m, batch_dims=1) # K x 1 beta_kalpha_m = tf.gather_nd(beta_m, kalpha_m, batch_dims=1) # K x 1 object_weights_kalpha_m = tf.gather_nd(object_weights_m, kalpha_m, batch_dims=1) # K x 1 distance_scale_kalpha_m = tf.gather_nd(distance_scale_m, kalpha_m, batch_dims=1) # K x 1 distance_scale_kalpha_m_exp = tf.expand_dims(distance_scale_kalpha_m, axis=2) # K x 1 x 1 distancesq_m = tf.reduce_sum((tf.expand_dims(x_kalpha_m, axis=1) - x_m)**2, axis=-1, keepdims=True) #K x V-obj x 1 distancesq_m *= distance_scale_kalpha_m_exp**2 huberdistsq = huber(tf.sqrt(distancesq_m + 1e-5), d=4) #acts at 4 V_att = q_m * tf.expand_dims(q_kalpha_m, axis=1) * huberdistsq #K x V-obj x 1 V_att = V_att * tf.expand_dims(object_weights_kalpha_m, axis=1) #K x V-obj x 1 if weight_by_q: V_att = tf.math.divide_no_nan(tf.reduce_sum(padmask_m * V_att, axis=1), tf.reduce_sum(q_m, axis=1)) # K x 1 else: V_att = tf.math.divide_no_nan(tf.reduce_sum(padmask_m * V_att, axis=1), N_per_obj + 1e-9) # K x 1 V_att = tf.math.divide_no_nan(tf.reduce_sum(V_att, axis=0), K + 1e-9) # 1 #what if Vatt and Vrep are weighted by q, not scaled by it? q_rep = q if repulsion_q_min >= 0: q_rep = qraw + repulsion_q_min q_kalpha_m += repulsion_q_min - q_min #now the bit that needs Mnot Mnot_distances = tf.expand_dims(x_kalpha_m, axis=1) #K x 1 x C Mnot_distances = Mnot_distances - tf.expand_dims(x, axis=0) #K x V x C if super_repulsion: sq_distance = tf.reduce_sum(Mnot_distances**2, axis=-1, keepdims=True) #K x V x 1 l_distance = tf.reduce_sum(tf.abs(Mnot_distances), axis=-1, keepdims=True) #K x V x 1 V_rep = 0.5 * (sq_distance + l_distance) else: V_rep = tf.reduce_sum(Mnot_distances**2, axis=-1, keepdims=True) #K x V x 1 V_rep *= distance_scale_kalpha_m_exp**2 #K x V x 1 , same scaling as attractive potential V_rep = 1. / (V_rep + 0.1 ) #-2.*tf.math.log(1.-tf.math.exp(-V_rep/2.)+1e-5) V_rep *= M_not * tf.expand_dims(q_rep, axis=0) #K x V x 1 V_rep = tf.reduce_sum(V_rep, axis=1) #K x 1 V_rep *= object_weights_kalpha_m * q_kalpha_m #K x 1 if weight_by_q: sumq = tf.reduce_sum(M_not * tf.expand_dims(q_rep, axis=0), axis=1) V_rep = tf.math.divide_no_nan(V_rep, sumq) # K x 1 else: V_rep = tf.math.divide_no_nan( V_rep, tf.expand_dims(tf.expand_dims(N, axis=0), axis=0) - N_per_obj + 1e-9) # K x 1 V_rep = tf.math.divide_no_nan(tf.reduce_sum(V_rep, axis=0), K + 1e-9) # 1 ## beta terms B_pen = -tf.reduce_sum(padmask_m * 1. / (20. * distancesq_m + 1.), axis=1) # K x 1 B_pen += 1. #remove self-interaction term (just for offset) B_pen *= object_weights_kalpha_m * beta_kalpha_m B_pen = tf.math.divide_no_nan(B_pen, N_per_obj + 1e-9) # K x 1 #now 'standard' 1-beta B_pen -= 0.2 * object_weights_kalpha_m * ( tf.math.log(beta_kalpha_m + 1e-9)) #tf.math.sqrt(beta_kalpha_m+1e-6) #another "-> 1, but slower" per object B_pen = tf.math.divide_no_nan(tf.reduce_sum(B_pen, axis=0), K + 1e-9) # 1 too_much_B_pen = tf.constant([0.], dtype='float32') Noise_pen = S_B * tf.math.divide_no_nan(tf.reduce_sum(is_noise * beta_in), tf.reduce_sum(is_noise)) #explicit payload weight function here, the old one was odd #too aggressive scaling is bad for high learning rates. Move to simple x^4 p_w = padmask_m * tf.clip_by_value( beta_m**2, 1e-3, 10.) #already zero-padded , K x V_perobj x 1 #normalise to maximum; this + 1e-9 might be an issue POSSIBLE FIXME if payload_beta_gradient_damping_strength > 0: p_w = payload_beta_gradient_damping_strength * tf.stop_gradient(p_w) + \ (1.- payload_beta_gradient_damping_strength)* p_w payload_loss_m = p_w * SelectWithDefault( Msel, (1. - is_noise) * payload_loss, 0.) #K x V_perobj x P payload_loss_m = object_weights_kalpha_m * tf.reduce_sum(payload_loss_m, axis=1) payload_loss_m = tf.math.divide_no_nan(payload_loss_m, tf.reduce_sum(p_w, axis=1)) #pll = tf.math.divide_no_nan(payload_loss_m, N_per_obj+1e-9) # K x P #really? pll = tf.math.divide_no_nan(tf.reduce_sum(payload_loss_m, axis=0), K + 1e-3) # P #explicit K**2 repulsion #if k_sq_repulsion_strength > 0.: #x_kalpha_m: K x C # k_sq_rep = tf.expand_dims(x_kalpha_m, axis=0) - tf.expand_dims(x_kalpha_m, axis=1) #x_kalpha_m: K x K x C # k_sq_rep = tf.reduce_sum(k_sq_rep**2, axis=-1) #distances**2 K x K # k_sq_rep = -2.*tf.math.log(1.-tf.math.exp(-k_sq_rep/2.)+1e-5) #K x K # #add qTq scaling also here? # k_sq_rep *= q_kalpha_m # adding the latter term would just add a factor of 2. to the corresponding kalpha Mnot term * tf.expand_dims(q_kalpha_m[:,0], axis=0) #K x K # k_sq_rep *= object_weights_kalpha_m * tf.expand_dims(object_weights_kalpha_m[:,0], axis=0) #K x K # k_sq_rep = tf.math.divide_no_nan(tf.reduce_sum(k_sq_rep,axis=0), K+1e-9) # k_sq_rep = tf.math.divide_no_nan(tf.reduce_sum(k_sq_rep,axis=0), K+1e-9) # # V_rep += k_sq_repulsion_strength * k_sq_rep # #object_weights_kalpha_m return V_att, V_rep, Noise_pen, B_pen, pll, too_much_B_pen
def SelectKnn(K: int, coords, row_splits, masking_values=None, threshold=0.5, tf_compatible=True, max_radius=-1., mask_mode='none', mask_logic='xor'): ''' returns indices and distances**2 , gradient for distances is implemented! new: mask (switch): masked: 0) none = no masking 1) acc = get to have neighbours 2) scat = get to be neighbours 10) xor: exclusive (one xor the other) -> exchange between collections, direction given by 1 and 2 20) and: selected (one and the other) -> pooling no gradient for the mask! ''' assert mask_mode == 'none' or mask_mode == 'acc' or mask_mode == 'scat' assert mask_mode == 'none' or mask_logic == 'xor' or mask_logic == 'and' if masking_values is None: assert mask_mode == 'none' masking_values = tf.zeros_like(coords[:, 0:1]) mask = tf.zeros_like(masking_values, dtype='int32') mask = tf.where(masking_values > threshold, mask + 1, mask) #print('mask',mask) op_mask_mode = 0 if mask_logic == 'xor': op_mask_mode = 10 elif mask_logic == 'and': op_mask_mode = 20 if mask_mode == 'acc': op_mask_mode += 1 elif mask_mode == 'scat': op_mask_mode += 2 ''' 0) none = no masking 1) acc = get to have neighbours 2) scat = get to be neighbours 10) xor: exclusive (one xor the other) -> exchange between collections, direction given by 1 and 2 20) and: selected (one and the other) -> pooling (scat and acc don't matter) ''' idx, distsq = _sknn_op.SelectKnn(n_neighbours=K, tf_compatible=tf_compatible, max_radius=max_radius, coords=coords, row_splits=row_splits, mask=mask, mask_mode=op_mask_mode) #safe guards with tf.control_dependencies([ tf.assert_equal(tf.range(tf.shape(idx)[0]), idx[:, 0]), tf.assert_less(idx, row_splits[-1]), tf.assert_less(-2, idx) ]): if not gl.knn_ops_use_tf_gradients: return idx, distsq ncoords = SelectWithDefault(idx, coords, 0.) distsq = (ncoords[:, 0:1, :] - ncoords)**2 distsq = tf.reduce_sum(distsq, axis=2) distsq = tf.where(idx < 0, 0., distsq) return idx, distsq
truth_idxs = tf.random.uniform((nvert,1), 0, 6, dtype='int32', seed=0) - 1 #for noise features = tf.random.uniform((nvert,1),seed=0) selidx,mnot,cperunique = CreateMidx(truth_idxs, calc_m_not=True) #just a small consistency check #print(truth_idxs) #print(selidx) #print(mnot) #print(cperunique) beta_m = SelectWithDefault(selidx, features, -1.) kalpha_m = tf.argmax(beta_m,axis=1) #print(beta_m, kalpha_m) #print(tf.gather_nd(beta_m,kalpha_m, batch_dims=1)) #now test the whole loss from object_condensation import oc_per_batch_element, oc_per_batch_element_old ''' oc_per_batch_element( beta, x, q_min,
def oc_per_batch_element( beta, x, q_min, object_weights, # V x 1 !! truth_idx, is_spectator, payload_loss, S_B=1., noise_q_min=None, distance_scale=None, payload_weight_function=None, #receives betas as K x V x 1 as input, and a threshold val payload_weight_threshold=0.8, use_mean_x=0., cont_beta_loss=False, prob_repulsion=False, phase_transition=False, phase_transition_double_weight=False, payload_beta_gradient_damping_strength=0., kalpha_damping_strength=0., beta_gradient_damping=0., soft_q_scaling=True, weight_by_q=False, repulsion_q_min=-1., super_repulsion=False, super_attraction=False, div_repulsion=False, soft_att=True, dynamic_payload_scaling_onset=-0.03): ''' all inputs V x X , where X can be 1 ''' tf.assert_equal(True, is_spectator >= 0.) tf.assert_equal(True, beta >= 0.) if prob_repulsion: raise ValueError("prob_repulsion not implemented") if phase_transition_double_weight: raise ValueError("phase_transition_double_weight not implemented") if payload_weight_function is not None: raise ValueError("payload_weight_function not implemented") #set all spectators invalid here, everything scales with beta, so: if beta_gradient_damping > 0.: beta = beta_gradient_damping * tf.stop_gradient(beta) + ( 1. - beta_gradient_damping) * beta beta_in = beta beta = tf.clip_by_value(beta, 0., 1. - 1e-4) q_min *= (1. - is_spectator) qraw = tf.math.atanh(beta)**2 if soft_q_scaling: qraw = tf.math.atanh(beta_in / 1.002)**2 #beta_in**4 *20. is_noise = tf.where(truth_idx < 0, tf.zeros_like(truth_idx, dtype='float32') + 1., 0.) #V x 1 if noise_q_min is not None: q_min = (1. - is_noise) * q_min + is_noise * noise_q_min q_min = tf.where( q_min < 0, 0., q_min) #just safety in case there are some numerical effects q = qraw + q_min # V x 1 #q = tf.where(beta_in<1.-1e-4, q, tf.math.atanh(1.-1e-4)**2 + q_min + beta_in) #just give the rest above clip a gradient N = tf.cast(beta.shape[0], dtype='float32') Msel, M_not, N_per_obj = CreateMidx(truth_idx, calc_m_not=True) #use eager here if Msel is None: #V_att, V_rep, Noise_pen, B_pen, pll, too_much_B_pen print( '>>> WARNING: Event has no objects, only noise! Will return zero loss. <<<' ) zero_tensor = tf.reduce_mean(q, axis=0) * 0. zero_payload = tf.reduce_mean(payload_loss, axis=0) * 0. return zero_tensor, zero_tensor, zero_tensor, zero_tensor, zero_payload, zero_tensor N_per_obj = tf.cast(N_per_obj, dtype='float32') # K x 1 K = tf.cast(Msel.shape[0], dtype='float32') ######################################################## #sanity check, use none of the following for the loss calculation truth_m = SelectWithDefault(Msel, truth_idx, -2) #K x V-obj x 1 truth_same = truth_m[:, 0:1] == truth_m truth_same = tf.where(truth_m == -2, True, truth_same) tf.assert_equal( tf.reduce_all(truth_same), True, message="truth indices do not match object selection, serious bug") #end sanity check ######################################################## padmask_m = SelectWithDefault(Msel, tf.zeros_like(beta_in) + 1., 0.) #K x V-obj x 1 x_m = SelectWithDefault(Msel, x, 0.) #K x V-obj x C beta_m = SelectWithDefault(Msel, beta, 0.) #K x V-obj x 1 is_spectator_m = SelectWithDefault(Msel, is_spectator, 0.) #K x V-obj x 1 q_m = SelectWithDefault(Msel, q, 0.) #K x V-obj x 1 object_weights_m = SelectWithDefault(Msel, object_weights, 0.) distance_scale += 1e-3 distance_scale_m = SelectWithDefault(Msel, distance_scale, 1.) tf.assert_greater(distance_scale_m, 0., message="predicted distances must be greater zero") kalpha_m = tf.argmax((1. - is_spectator_m) * beta_m, axis=1) # K x 1 x_kalpha_m = tf.gather_nd(x_m, kalpha_m, batch_dims=1) # K x C if use_mean_x > 0: x_kalpha_m_m = tf.reduce_sum(beta_m * q_m * x_m * padmask_m, axis=1) # K x C x_kalpha_m_m = tf.math.divide_no_nan( x_kalpha_m_m, tf.reduce_sum(beta_m * q_m * padmask_m, axis=1) + 1e-9) x_kalpha_m = use_mean_x * x_kalpha_m_m + (1. - use_mean_x) * x_kalpha_m if kalpha_damping_strength > 0: x_kalpha_m = kalpha_damping_strength * tf.stop_gradient(x_kalpha_m) + ( 1. - kalpha_damping_strength) * x_kalpha_m q_kalpha_m = tf.gather_nd(q_m, kalpha_m, batch_dims=1) # K x 1 beta_kalpha_m = tf.gather_nd(beta_m, kalpha_m, batch_dims=1) # K x 1 object_weights_kalpha_m = tf.gather_nd(object_weights_m, kalpha_m, batch_dims=1) # K x 1 #make the distance scale a beta weighted mean so that there is more than 1 impact per object distance_scale_kalpha_m = tf.math.divide_no_nan( tf.reduce_sum(distance_scale_m * beta_m * padmask_m, axis=1), tf.reduce_sum(beta_m * padmask_m, axis=1) + 1e-3) + 1e-3 #K x 1 #distance_scale_kalpha_m = tf.gather_nd(distance_scale_m,kalpha_m, batch_dims=1) # K x 1 distance_scale_kalpha_m_exp = tf.expand_dims(distance_scale_kalpha_m, axis=2) # K x 1 x 1 distancesq_m = tf.reduce_sum((tf.expand_dims(x_kalpha_m, axis=1) - x_m)**2, axis=-1, keepdims=True) #K x V-obj x 1 distancesq_m = tf.math.divide_no_nan( distancesq_m, 2. * distance_scale_kalpha_m_exp**2 + 1e-6) absdist = tf.sqrt(distancesq_m + 1e-6) huberdistsq = huber(absdist, d=4) #acts at 4 if super_attraction: huberdistsq += 1. - tf.math.exp(-100. * absdist) V_att = q_m * tf.expand_dims(q_kalpha_m, axis=1) * huberdistsq #K x V-obj x 1 if soft_att: V_att = q_m * tf.math.log(tf.math.exp(1.) * distancesq_m + 1.) V_att = V_att * tf.expand_dims(object_weights_kalpha_m, axis=1) #K x V-obj x 1 if weight_by_q: V_att = tf.math.divide_no_nan(tf.reduce_sum(padmask_m * V_att, axis=1), tf.reduce_sum(q_m, axis=1)) # K x 1 else: V_att = tf.math.divide_no_nan(tf.reduce_sum(padmask_m * V_att, axis=1), N_per_obj + 1e-9) # K x 1 # opt. used later in payload loss V_att_K = V_att V_att = tf.math.divide_no_nan(tf.reduce_sum(V_att, axis=0), K + 1e-9) # 1 #what if Vatt and Vrep are weighted by q, not scaled by it? q_rep = q if repulsion_q_min >= 0: raise ValueError("repulsion_q_min >= 0: spectators TBI") q_rep = (qraw + repulsion_q_min) * (1. - is_spectator) q_kalpha_m += repulsion_q_min - q_min #now the bit that needs Mnot Mnot_distances = tf.expand_dims(x_kalpha_m, axis=1) #K x 1 x C Mnot_distances = Mnot_distances - tf.expand_dims(x, axis=0) #K x V x C rep_distances = tf.reduce_sum(Mnot_distances**2, axis=-1, keepdims=True) #K x V x 1 rep_distances = tf.math.divide_no_nan( rep_distances, 2. * distance_scale_kalpha_m_exp**2 + 1e-6) V_rep = tf.math.exp( -rep_distances ) #1. / (V_rep + 0.1) #-2.*tf.math.log(1.-tf.math.exp(-V_rep/2.)+1e-5) if super_repulsion: V_rep += 10. * tf.math.exp(-100. * tf.sqrt(rep_distances + 1e-6)) if div_repulsion: V_rep = 1. / (rep_distances + 0.1) #spec weights are in q V_rep *= M_not * tf.expand_dims(q_rep, axis=0) #K x V x 1 V_rep = tf.reduce_sum(V_rep, axis=1) #K x 1 V_rep *= object_weights_kalpha_m * q_kalpha_m #K x 1 if weight_by_q: sumq = tf.reduce_sum(M_not * tf.expand_dims(q_rep, axis=0), axis=1) V_rep = tf.math.divide_no_nan(V_rep, sumq) # K x 1 else: V_rep = tf.math.divide_no_nan( V_rep, tf.expand_dims(tf.expand_dims(N, axis=0), axis=0) - N_per_obj + 1e-9) # K x 1 # opt used later in payload loss V_rep_K = V_rep V_rep = tf.math.divide_no_nan(tf.reduce_sum(V_rep, axis=0), K + 1e-9) # 1 B_pen = None def bpenhelp(b_m, exponent: int): b_mes = tf.reduce_sum(b_m**exponent, axis=1) if not exponent == 1: b_mes = (b_mes + 1e-16)**(1. / float(exponent)) return tf.math.log((1. - b_mes)**2 + 1. + 1e-8) if phase_transition: ## beta terms B_pen = -tf.reduce_sum(padmask_m * 1. / (20. * distancesq_m + 1.), axis=1) # K x 1 B_pen += 1. #remove self-interaction term (just for offset) B_pen *= object_weights_kalpha_m * beta_kalpha_m B_pen = tf.math.divide_no_nan(B_pen, N_per_obj + 1e-9) # K x 1 #now 'standard' 1-beta B_pen -= 0.2 * object_weights_kalpha_m * ( tf.math.log(beta_kalpha_m + 1e-9) ) #tf.math.sqrt(beta_kalpha_m+1e-6) #another "-> 1, but slower" per object B_pen = tf.math.divide_no_nan(tf.reduce_sum(B_pen, axis=0), K + 1e-9) # 1 else: B_pen_po = object_weights_kalpha_m * (1. - beta_kalpha_m) B_pen = tf.math.divide_no_nan(tf.reduce_sum(B_pen_po, axis=0), K + 1e-9) #1 #get out of random gradients in the beginning #introduces gradients on all betas of hits rather than just the max one B_up = tf.math.divide_no_nan( tf.reduce_sum((1. - is_noise) * (1. - beta_in)), N - tf.reduce_sum(is_noise)) B_pen += 0.01 * B_pen * B_up #if it's high try to elevate all betas if cont_beta_loss: B_pen = bpenhelp(beta_m, 2) + bpenhelp(beta_m, 4) B_pen = tf.math.divide_no_nan( tf.reduce_sum(object_weights_kalpha_m * B_pen, axis=0), K + 1e-9) too_much_B_pen = object_weights_kalpha_m * bpenhelp( beta_m, 1) #K x 1, don't make it steep too_much_B_pen = tf.math.divide_no_nan(tf.reduce_sum(too_much_B_pen), K + 1e-9) Noise_pen = S_B * tf.math.divide_no_nan(tf.reduce_sum(is_noise * beta_in), tf.reduce_sum(is_noise) + 1e-3) #explicit payload weight function here, the old one was odd #too aggressive scaling is bad for high learning rates. p_w = padmask_m * tf.math.atanh(beta_m / 1.002)**2 #this is well behaved if payload_beta_gradient_damping_strength > 0: p_w = payload_beta_gradient_damping_strength * tf.stop_gradient(p_w) + \ (1.- payload_beta_gradient_damping_strength)* p_w payload_loss_m = p_w * SelectWithDefault( Msel, (1. - is_noise) * payload_loss, 0.) #K x V_perobj x P payload_loss_m = object_weights_kalpha_m * tf.reduce_sum(payload_loss_m, axis=1) # K x P #here normalisation per object payload_loss_m = tf.math.divide_no_nan(payload_loss_m, tf.reduce_sum(p_w, axis=1)) #print('dynamic_payload_scaling_onset',dynamic_payload_scaling_onset) if dynamic_payload_scaling_onset > 0: #stop gradient V_scaler = tf.stop_gradient(V_rep_K + V_att_K) # K x 1 #print('N_per_obj[V_scaler=0]',N_per_obj[V_scaler==0]) #max of V_scaler is around 1 given the potentials scaling = tf.exp(-tf.math.log(2.) * V_scaler / (dynamic_payload_scaling_onset / 5.)) #print('affected fraction',tf.math.count_nonzero(scaling>0.5,dtype='float32')/K,'max',tf.reduce_max(V_scaler,axis=0,keepdims=True)) payload_loss_m *= scaling #basically the onset of the rise #pll = tf.math.divide_no_nan(payload_loss_m, N_per_obj+1e-9) # K x P #really? pll = tf.math.divide_no_nan(tf.reduce_sum(payload_loss_m, axis=0), K + 1e-3) # P return V_att, V_rep, Noise_pen, B_pen, pll, too_much_B_pen