def _prepare(self, ids, is_train=True, validation=False, cache=True): fname = "train_nmf" if is_train else "test_nmf" if validation: fname = "validation_nmf" filename = "%s%s.pkl" % (fname, "_smooth" if self.smooth_signal else "") tcache_path = os.path.join(self._base_path, filename) if os.path.exists(tcache_path): fd = open(tcache_path, "rb") ret = pkl.load(fd) fd.close() return ret tpath = self._train_dir_path if is_train else self._test_dir_path ret = [] for id in tqdm(ids, desc=fname): for tscale in TSCALE_LIST: dat = pd.read_csv(os.path.join(tpath, "%d.csv" % id), header=None) d0 = dat[0] * 1e-6 d1 = np.cumsum(d0) d2 = dat[1] invtscale = 1 / tscale tmax = int(d1.values[-1] * tscale) bins = EBINS zmat = np.zeros((tmax, bins)) ebins = np.linspace(0, MAX_ENERGY, bins + 1) for i in range(int(TOFFS * tscale), tmax): dind = np.argwhere((d1 > i * invtscale) & (d1 < (i + 1) * invtscale)).flatten() d3 = d2[dind] hist = np.histogram(d3, bins=ebins)[0] if self.smooth_signal: hist = denoise_signal(hist) zmat[i, :] = hist ret.append(zmat) if cache: fd = open(tcache_path, "wb") pkl.dump(ret, fd) fd.close() return ret
def __init__(self, base_path): alg_radmm_base.AlgRadMMBase.__init__(self, base_path) self.smooth_signal = False sdata = self._source_data self.source_hist = np.zeros((10, EBINS)) for shielding in range(2): for source in range(5): arr = [] for binidx in range(EBINS): energyFrom = binidx / EBINS * MAX_ENERGY energyTo = (binidx + 1) / EBINS * MAX_ENERGY dat = sdata[(sdata["Shielding"] == shielding) & (sdata["SourceID"] == source + 1) & (sdata["PhotonEnergy"] > energyFrom) & (sdata["PhotonEnergy"] < energyTo)] arr.append(dat["CountRate"].mean()) dat1 = denoise_signal( np.array(arr)) if self.smooth_signal else np.array(arr) self.source_hist[shielding * 5 + source, :] = np.abs(dat1) self.source_hist[shielding * 5 + source, :] /= np.max( self.source_hist[shielding * 5 + source, :]) kev_per_bin = int(MAX_ENERGY / EBINS) self.bin_map_arr = [] for i in range(len(SOURCE_METADATA)): bin_map = dict() for elem in SOURCE_METADATA[i]: from_idx = _rdown(elem[0], kev_per_bin) to_idx = _rup(elem[1], kev_per_bin) for idx in range(from_idx, to_idx + 1): bin_map[idx] = 1 self.bin_map_arr.append(bin_map) min_mp_sz = min([len(mp) for mp in self.bin_map_arr]) self.weigh_thresh_arr = [] self.weigh_bin_map_arr = [] for i in range(len(self.bin_map_arr)): self.weigh_bin_map_arr.append(min_mp_sz / len(self.bin_map_arr[i])) self.bin_map_arr[i] = list(self.bin_map_arr[i]) self.weigh_thresh_arr.append(len(self.bin_map_arr[i]) / EBINS)
def _prepare(self, ids, is_train=True, validation=False, cache=True): filename = "train.pkl" if is_train else "test.pkl" if validation: filename = "validation.pkl" tcache_path = os.path.join(self._base_path, filename) if os.path.exists(tcache_path): fd = open(tcache_path, "rb") ret = pkl.load(fd) fd.close() return ret tpath = self._train_dir_path if is_train else self._test_dir_path ret = [] for id in tqdm(ids): dat = pd.read_csv(os.path.join(tpath, "%d.csv" % id), header=None) d0=dat[0]*1e-6 d1=np.cumsum(d0) d2=dat[1] tmax=int(d1.values[-1]) bins = EBINS zmat = np.zeros((tmax-TOFFS,bins)) ebins = np.linspace(0,MAX_ENERGY,bins) for i in range(tmax-TOFFS): dind = np.argwhere((d1 > (TOFFS + i)) & (d1 < (TOFFS + i + 1))).flatten() d3 = d2[dind] hist = np.histogram(d3, bins=ebins)[0] hist = denoise_signal(hist) zmat[i,:] = hist ret.append(zmat) if cache: fd = open(tcache_path, "wb") pkl.dump(ret, fd) fd.close() return ret
def predict(self, x, ids, export=False): model = load_model( "/mnt/ssd/radiologicalthreatsmm/weight_01-val_acc0.820.h5") ret = np.zeros((len(ids), 2)) nn_stat = [] export_data = [] for i in tqdm(range(len(ids))): id = ids[i] arr = [] tiarr = [] sourcearr = [] tscalearr = [] g_arr = [] g_tiarr = [] g_sourcearr = [] g_tscalearr = [] g_sres_bgs = [] g_smooth_arr = [] g_diff_fit_bg = [] for is_smooth in range(1): for (j, tscale) in enumerate(TSCALE_LIST): dat = np.abs(x[len(TSCALE_LIST) * i + j]) tmax = dat.shape[0] if is_smooth: dat = np.abs(denoise_signal(dat)) weigh = self.model_bg.transform(dat) weigh_arr_s = [] for source in range(len(self.model_arr_bgs)): weigh_arr_s.append( self.model_arr_bgs[source].transform(dat)) for ti in range(int(30 * tscale), tmax): fit_bg = np.dot(weigh[ti], self.comps_bg) diff_fit_bg = fit_bg - dat[ti, :] sres = [] sres_bg = [] sres_bgs = [] for source in range(len(self.model_arr_bgs)): fit_bgs = np.dot( weigh_arr_s[source][ti], self.model_arr_bgs[source].components_) diff_fit_bgs = fit_bgs - dat[ti, :] norm_bg = self._calc_source_norm( diff_fit_bg, source) norm_bgs = self._calc_source_norm( diff_fit_bgs, source) sres.append(norm_bg / norm_bgs) sres_bg.append(norm_bg) sres_bgs.append(norm_bgs) if sres: sresi = np.argmax(sres) coeff = SIGNAL_COEFF[sresi] thresh = SIGNAL_THRESHOLD_ARR[sresi + is_smooth * 6] bgthresh = BG_THRESHOLD #BG_THRESHOLD_ARR[sresi] #if sres_bgs[sresi] > BG_THRESHOLD * coeff and sres[sresi] > SIGNAL_THRESHOLD * coeff: if sres_bgs[sresi] > bgthresh and sres[ sresi] > thresh: arr.append(sres[sresi]) tiarr.append(ti / tscale) sourcearr.append(sresi) tscalearr.append(tscale) g_arr.append(sres[sresi]) g_tiarr.append(ti / tscale) g_sourcearr.append(sresi) g_tscalearr.append(tscale) g_sres_bgs.append(sres_bgs[sresi]) g_smooth_arr.append(is_smooth) g_diff_fit_bg.append(diff_fit_bg) if arr: idx = np.argmax(arr) ti = tiarr[idx] si = sourcearr[idx] toffs = 1 / tscalearr[idx] * 0.5 ret[i, 0] = 1 + si ret[i, 1] = ti + toffs nn_stat.append((-1, -1)) else: idx = np.argmax(g_arr) ti = g_tiarr[idx] si = g_sourcearr[idx] toffs = 1 / g_tscalearr[idx] * 0.5 diff_fit_bg = np.abs(g_diff_fit_bg[idx][:NN_BINS]) proba = model.predict(diff_fit_bg.reshape(-1, NN_BINS, 1))[0][0] if proba > NN_PROBA: ret[i, 0] = 1 + si ret[i, 1] = ti + toffs nn_stat.append((id, proba)) export_data.append([ g_arr, g_tiarr, g_sourcearr, g_tscalearr, g_sres_bgs, g_smooth_arr ]) if export: tcache_path = os.path.join(self._base_path, "export.pkl") fd = open(tcache_path, "wb") pkl.dump(export_data, fd) fd.close() if nn_stat: tnnstat_path = os.path.join(self._base_path, "nn_stat.pkl") fd = open(tnnstat_path, "wb") pkl.dump(nn_stat, fd) fd.close() return ret
def _get_train_tree_data(self, x, scaleidx, ids): tscale1 = TSCALE_LIST[scaleidx] sig_list = [] bg_list = [] for (i, runid) in enumerate(ids): source_id = self._train_metadata.loc[runid]["SourceID"] source_time = self._train_metadata.loc[runid]["SourceTime"] if source_id != 0: if source_time < TTHRESH: continue if x[i * len(TSCALE_LIST) + 2].shape[0] < (source_time + 5 + 5): continue sig_list.append((i, runid)) else: if x[i * len(TSCALE_LIST) + 2].shape[0] < (TTHRESH + 5 + 5): continue bg_list.append((i, runid)) np.random.shuffle(sig_list) np.random.shuffle(bg_list) min_sz = min(len(sig_list), len(bg_list)) sig_list = sig_list[:min_sz] bg_list = bg_list[:min_sz] tpath = self._train_dir_path xlist = [] ylist = [] for elem in ((1, sig_list), (0, bg_list)): for (idx, runid) in tqdm(elem[1], desc="train(%d)" % (scaleidx)): source_time = 0 if elem[0]: source_time = self._train_metadata.loc[runid]["SourceTime"] else: source_time = TTHRESH for j in [scaleidx]: #range(len(TSCALE_LIST)): tscale = TSCALE_LIST[j] invtscale = 1 / tscale g_dat = pd.read_csv(os.path.join(tpath, "%d.csv" % runid), header=None) d0 = g_dat[0] * 1e-6 d1 = np.cumsum(d0) d2 = g_dat[1] bins = EBINS ebins = np.linspace(0, MAX_ENERGY, bins + 1) tmax = d1.values[-1] tstep = TSTEP_PER_SCALE[j] tcurr = source_time timeHist = np.histogram(d1, bins=1024)[0] timeHist = denoise_signal(timeHist) peaks, _ = find_peaks(timeHist, prominence=(5)) peaksS = peaks / 1024 * tmax hist_list = [] tiarr = [] tscalearr = [] twin = TWIN_PER_SCALE[scaleidx] twinoffs = int(twin / 2) inp = [] toffs_arr = [] for tinc in range(twin): ttoffs_s = (tinc - twinoffs) * tstep assert (tcurr + ttoffs_s > TOFFS) assert (tcurr + ttoffs_s + invtscale < tmax) dind = np.argwhere((d1 > tcurr + ttoffs_s) & ( d1 < tcurr + ttoffs_s + invtscale)).flatten() d3 = d2[dind] hist = np.histogram(d3, bins=ebins)[0] inp.append(hist) toffs_arr.append(tcurr + ttoffs_s) xrow = self._row2record(self.model_bgs, inp, toffs_arr, peaksS, tmax) xlist.append(xrow) ylist.append(elem[0]) xlist = np.vstack(xlist) ylist = np.vstack(ylist) return (xlist, ylist)
def predict(self, x, ids, export=False): ret = np.zeros((len(ids), 2)) export_data = [] for i in tqdm(range(len(ids))): id = ids[i] arr = [] tiarr = [] sourcearr = [] tscalearr = [] g_arr = [] g_tiarr = [] g_sourcearr = [] g_tscalearr = [] g_sres_bgs = [] g_smooth_arr = [] #for is_smooth in range(2): for is_smooth in range(1): for (j, tscale) in enumerate(TSCALE_LIST): dat = np.abs(x[len(TSCALE_LIST)*i + j]) tmax = dat.shape[0] if is_smooth: dat = np.abs(denoise_signal(dat)) weigh = self.model_bg.transform(dat) weigh_arr_s = [] for source in range(len(self.model_arr_bgs)): weigh_arr_s.append(self.model_arr_bgs[source].transform(dat)) for ti in range(int(30*tscale),tmax): fit_bg = np.dot(weigh[ti], self.comps_bg) diff_fit_bg = fit_bg - dat[ti, :] sres = [] sres_bg = [] sres_bgs = [] for source in range(len(self.model_arr_bgs)): fit_bgs = np.dot(weigh_arr_s[source][ti], self.model_arr_bgs[source].components_) diff_fit_bgs = fit_bgs - dat[ti, :] norm_bg = self._calc_source_norm(diff_fit_bg, source) norm_bgs = self._calc_source_norm(diff_fit_bgs, source) sres.append(norm_bg / norm_bgs) sres_bg.append(norm_bg) sres_bgs.append(norm_bgs) if sres: sresi = np.argmax(sres) coeff = SIGNAL_COEFF[sresi] thresh = SIGNAL_THRESHOLD_ARR[sresi + is_smooth * 6] bgthresh = BG_THRESHOLD #BG_THRESHOLD_ARR[sresi] #if sres_bgs[sresi] > BG_THRESHOLD * coeff and sres[sresi] > SIGNAL_THRESHOLD * coeff: if sres_bgs[sresi] > bgthresh and sres[sresi] > thresh: arr.append(sres[sresi]) tiarr.append(ti / tscale) sourcearr.append(sresi) tscalearr.append(tscale) g_arr.append(sres[sresi]) g_tiarr.append(ti / tscale) g_sourcearr.append(sresi) g_tscalearr.append(tscale) g_sres_bgs.append(sres_bgs[sresi]) g_smooth_arr.append(is_smooth) if arr: idx = np.argmax(arr) ti = tiarr[idx] si = sourcearr[idx] toffs = 1/tscalearr[idx] * 0.5 ret[i, 0] = 1 + si ret[i, 1] = ti + toffs export_data.append([g_arr, g_tiarr, g_sourcearr, g_tscalearr, g_sres_bgs, g_smooth_arr]) if export: tcache_path = os.path.join(self._base_path, "export.pkl") fd = open(tcache_path, "wb") pkl.dump(export_data, fd) fd.close() return ret