def generate_toy(config, N=1000, force=True, max_N=100000): decay_group = config.get_decay() amp = config.get_amplitude() def gen(M): return generate_phsp(config, M) all_data = [] n_gen = 0 n_accept = 0 n_total = 0 test_N = 10 * N while N > n_accept: test_N = abs(min(max_N, test_N)) data = single_sampling(gen, amp, test_N) n_gen = data_shape(data) n_total += test_N n_accept += n_gen test_N = int(1.01 * n_total / (n_accept + 1) * (N - n_accept)) all_data.append(data) ret = data_merge(*all_data) if force: cut = tf.range(data_shape(ret)) < N ret = data_mask(ret, cut) return ret
def _get_bg_weight(self, data=None, bg=None, display=True): w_bkg = self.config["data"].get("bg_weight", 0.0) if not isinstance(w_bkg, list): w_bkg = [w_bkg] * self._Ngroup assert len(w_bkg) == self._Ngroup w_inmc = self.config["data"].get("inject_ratio", 0.0) if not isinstance(w_inmc, list): w_inmc = [w_inmc] * self._Ngroup assert len(w_inmc) == self._Ngroup weight_scale = self.config["data"].get("weight_scale", False) # ??? if weight_scale: data = data if data is not None else self.get_data("data") bg = bg if bg is not None else self.get_data("bg") tmp = [] for wb, dt, sb in zip(w_bkg, data, bg): if isinstance(wb, str): wb = self.data.load_weight_file(wb) tmp.append(wb * data_shape(dt) / data_shape(sb)) w_bkg = tmp if display: print("background weight:", w_bkg) else: tmp = [] for wb in w_bkg: if isinstance(wb, str): wb = self.data.load_weight_file(wb) tmp.append(wb) w_bkg = tmp return w_bkg, w_inmc
def get_n_data(self): data = self.get_data("data") weight = [ data_i.get("weight", np.ones((data_shape(data_i),))) for data_i in data ] return [np.sum(weight_i) for weight_i in weight]
def process_scale(self, idx, data): if idx in self.scale_list and self.dic.get("weight_scale", False): n_bg = data_shape(data) scale_factor = self.get_n_data() / n_bg data["weight"] = (data.get("weight", np.ones( (n_bg, ))) * scale_factor) return data
def load_data(self, files, weights=None, weights_sign=1, charge=None) -> dict: # print(files, weights) if files is None: return None order = self.get_dat_order() p_list = self.get_particle_p() center_mass = self.dic.get("center_mass", True) r_boost = self.dic.get("r_boost", False) random_z = self.dic.get("random_z", False) npz_data = np.load(files) p = { get_particle(str(v)): npz_data[str(k)] for k, v in zip(p_list, order) } data = cal_angle_from_momentum( p, self.decay_struct, center_mass=center_mass, r_boost=r_boost, random_z=random_z, ) if "weight" in npz_data: data["weight"] = npz_data["weight"] if "charge_conjugation" in npz_data: data["charge_conjugation"] = npz_data["charge_conjugation"] else: data["charge_conjugation"] = np.ones((data_shape(data), )) return data
def _amp2s(data, cached_data): n_data = data_shape(data) pv = build_params_vector(dg, data) ret = [] for i, j in zip(pv, cached_data): # print(j) a = tf.reshape(i, [n_data, -1] + [1] * (len(j[0].shape) - 1)) ret.append(tf.reduce_sum(a * tf.stack(j, axis=1), axis=1)) # print(ret) amp = tf.reduce_sum(ret, axis=0) amp2s = tf.math.real(amp * tf.math.conj(amp)) return tf.reduce_sum(amp2s, list(range(1, len(amp2s.shape))))
def cal_signal_yields(self, params={}, mcdata=None, batch=25000): if hasattr(params, "params"): params = getattr(params, "params") if mcdata is None: mcdata = self.get_data("phsp") amp = self.get_amplitude() fracs = [ fit_fractions(amp, i, self.inv_he, params, batch) for i in mcdata ] data = self.get_data("data") bg = self.get_data("bg") if bg is None: N_total = [data_shape(i) for i in data] for i in data: N_data = data_shape(i) N_total.append((N_data, np.sqrt(N_data))) else: bg_weight, _ = self._get_bg_weight(data, bg) N_total = [] for i, j, w in zip(data, bg, bg_weight): N_data = data_shape(i) N_bg = data_shape(j) N_total.append( (N_data - w * N_bg, np.sqrt(N_data + w * w * N_bg))) N_sig_s = [] for frac_e, N_e in zip(fracs, N_total): frac, frac_err = frac_e N, N_err = N_e N_sig = {} for i in frac: N_sig[i] = ( frac[i] * N, np.sqrt((N * frac_err.get(i, 0.0))**2 + (N_err * frac[i])**2), ) N_sig_s.append(N_sig) return N_sig_s
def build_params_vector(dg, data): n_data = data_shape(data) m_dep = dg.get_m_dep(data) ret = [] for i in m_dep: tmp = i[0] if tmp.shape[0] == 1: tmp = tf.tile(tmp, [n_data] + [1] * (len(tmp.shape) - 1)) tmp = tf.reshape(tmp, (n_data, -1)) for j in i[1:]: tmp2 = tf.reshape(j, (j.shape[0], -1)) tmp = tf.reshape(tmp[:, :, None] * tmp2[:, None, :], (n_data, -1)) ret.append(tmp) return ret
def load_data( self, files, weights=None, weights_sign=1, charge=None ) -> dict: # print(files, weights) if files is None: return None order = self.get_dat_order() center_mass = self.dic.get("center_mass", True) r_boost = self.dic.get("r_boost", False) random_z = self.dic.get("random_z", False) data = prepare_data_from_decay( files, self.decay_struct, order, center_mass=center_mass, r_boost=r_boost, random_z=random_z, ) if weights is not None: if isinstance(weights, float): data["weight"] = np.array( [weights * weights_sign] * data_shape(data) ) elif isinstance(weights, str): # weight files weight = self.load_weight_file(weights) data["weight"] = weight[: data_shape(data)] * weights_sign else: raise TypeError( "weight format error: {}".format(type(weights)) ) if charge is not None: charges = self.load_weight_file(charge) data["charge_conjugation"] = charges[: data_shape(data)] else: data["charge_conjugation"] = np.ones((data_shape(data),)) return data
def reweight_init_value(amp, phsp, ns=None): """reset decay chain total and make the integration to be ns""" total = [i.total for i in amp.decay_group] n_phsp = data_shape(phsp) weight = np.array(phsp.get("weight", [1] * n_phsp)) sw = np.sum(weight) if ns is None: ns = [1] * len(total) elif isinstance(ns, (int, float)): ns = [ns / len(total)] * len(total) for i in total: i.set_rho(1.0) pw = amp.partial_weight(phsp) for i, w, ni in zip(total, pw, ns): i.set_rho(np.sqrt(ni / np.sum(weight * w) * sw))
def cached_amp(dg, data, matrix_method=build_angle_amp_matrix): idx, c_amp = matrix_method(dg, data) n_data = data_shape(data) @tf.function def _amp(): pv = build_params_vector(dg, data) ret = [] for i, j in zip(pv, c_amp): a = tf.reshape(i, [n_data, -1] + [1] * (len(j[0].shape) - 1)) ret.append(tf.reduce_sum(a * tf.stack(j, axis=1), axis=1)) # print(ret) amp = tf.reduce_sum(ret, axis=0) return amp return _amp
def fit( self, data=None, phsp=None, bg=None, inmc=None, batch=65000, method="BFGS", check_grad=False, improve=False, reweight=False, maxiter=None, ): if data is None and phsp is None: data, phsp, bg, inmc = self.get_all_data() fcn = self.get_fcn(batch=batch) else: fcn = self.get_fcn([data, phsp, bg, inmc], batch=batch) # print("sss") amp = self.get_amplitude() print("decay chains included: ") for i in self.full_decay: ls_list = [getattr(j, "get_ls_list", lambda x: None)() for j in i] print(" ", i, " ls: ", *ls_list) if reweight: ConfigLoader.reweight_init_value(amp, phsp[0], ns=data_shape(data[0])) print("\n########### initial parameters") print(json.dumps(amp.get_params(), indent=2), flush=True) print("initial NLL: ", fcn({})) # amp.get_params())) # fit configure # self.bound_dic[""] = (,) self.fit_params = fit( fcn=fcn, method=method, bounds_dict=self.bound_dic, check_grad=check_grad, improve=False, maxiter=maxiter, ) if self.fit_params.hess_inv is not None: self.inv_he = self.fit_params.hess_inv return self.fit_params
def sum_nll_grad_bacth(self, data): data_id = id(data) data = list(data) weight = [i.get("weight", tf.ones((data_shape(i), ))) for i in data] if data_id not in self.cached_data: self.cached_data[data_id] = [ build_amp.build_angle_amp_matrix(self.Amp.decay_group, i)[1] for i in data ] ln_data, g_ln_data = sum_gradient_data2( self.cached_amp, self.Amp.trainable_variables, data, self.cached_data[data_id], weight=weight, trans=clip_log, ) return -ln_data, [-i for i in g_ln_data]
def get_n_data(self): data = self.get_data("data") weight = data.get("weight", np.ones((data_shape(data),))) return np.sum(weight)
def nll_grad_hessian(self, data, mcdata, weight=1.0, batch=24000, bg=None, mc_weight=1.0): """ The parameters are the same with ``self.nll()``, but it will return Hessian as well. :return NLL: Real number. The value of NLL. :return gradients: List of real numbers. The gradients for each variable. :return Hessian: 2-D Array of real numbers. The Hessian matrix of the variables. """ data, weight = self.get_weight_data(data, weight, bg=bg) if isinstance(mc_weight, float): mc_weight = tf.convert_to_tensor([mc_weight] * data_shape(mcdata), dtype="float64") n_mc = tf.reduce_sum(mc_weight) sw = tf.reduce_sum(weight) ln_data, g_ln_data, h_ln_data = sum_hessian( self.Amp, split_generator(data, batch), self.Amp.trainable_variables, weight=split_generator(weight, batch), trans=clip_log, ) # int_mc, g_int_mc, h_int_mc = sum_hessian(self.Amp, split_generator(mcdata, batch), # self.Amp.trainable_variables, weight=split_generator( # mc_weight, batch)) if isinstance(mc_weight, float): mc_weight = tf.convert_to_tensor([mc_weight] * data_shape(mcdata), dtype="float64") mc_weight = mc_weight / tf.reduce_sum(mc_weight) mc_id = id(mcdata) if mc_id not in self.cached_int: self.build_cached_int(mcdata, mc_weight) with tf.GradientTape(persistent=True) as tape0: with tf.GradientTape() as tape: y_i = self.get_cached_int(mc_id) g_i = tape.gradient(y_i, self.Amp.trainable_variables, unconnected_gradients="zero") h_s_i = [] for gi in g_i: # 2nd order derivative h_s_i.append( tape0.gradient( gi, self.Amp.trainable_variables, unconnected_gradients="zero", )) del tape0 int_mc = y_i g_int_mc = tf.convert_to_tensor(g_i) h_int_mc = tf.convert_to_tensor(h_s_i) n_var = len(g_ln_data) nll = -ln_data + sw * tf.math.log(int_mc / n_mc) g = -g_ln_data + sw * g_int_mc / int_mc g_int_mc = g_int_mc / int_mc g_outer = tf.reshape(g_int_mc, (-1, 1)) * tf.reshape(g_int_mc, (1, -1)) h = -h_ln_data - sw * g_outer + sw / int_mc * h_int_mc return nll, g, h
def test_generate_phsp(toy_config): data = toy_config.generate_toy(1000, force=False) assert data_shape(data) >= 1000 data = toy_config.generate_toy(1000) assert data_shape(data) == 1000