def evaluate_tails(ans, preds, tails, topk=1): total_matched = sp.zeros(topk, dtype=sp.uint64) t_total_matched = sp.zeros(topk, dtype=sp.uint64) r_total_matched = sp.zeros(topk, dtype=sp.uint64) recall = sp.zeros(topk, dtype=sp.float64) t_recall = sp.zeros(topk, dtype=sp.float64) r_recall = sp.zeros(topk, dtype=sp.float64) q = 0 p = 0 r = 0 for i in trange(ans.shape[0]): truth = ans.indices[ans.indptr[i]:ans.indptr[i + 1]] tail_truth = get_in_tails(truth, tails) if not len(tail_truth): p += 1 t_preds = preds.indices[preds.indptr[i]:preds.indptr[i + 1]][:topk] matched = sp.isin(t_preds, truth) cum_matched = sp.cumsum(matched, dtype=sp.uint64) total_matched[:len(cum_matched)] += cum_matched recall[:len(cum_matched)] += cum_matched / len(truth) if len(cum_matched) != 0: total_matched[len(cum_matched):] += cum_matched[-1] recall[len(cum_matched):] += cum_matched[-1] / len(truth) continue q += 1 t_preds = preds.indices[preds.indptr[i]:preds.indptr[i + 1]][:topk] t_matched = sp.isin(t_preds, tail_truth) r_matched = sp.isin(t_preds, truth) t_cum_matched = sp.cumsum(t_matched, dtype=sp.uint64) r_cum_matched = sp.cumsum(r_matched, dtype=sp.uint64) t_total_matched[:len(t_cum_matched)] += t_cum_matched r_total_matched[:len(r_cum_matched)] += r_cum_matched t_recall[:len(t_cum_matched)] += t_cum_matched / len(tail_truth) r_recall[:len(r_cum_matched)] += r_cum_matched / len(truth) if len(t_cum_matched) != 0: t_total_matched[len(t_cum_matched):] += t_cum_matched[-1] t_recall[len(t_cum_matched ):] += t_cum_matched[-1] / len(tail_truth) if len(r_cum_matched) != 0: r_total_matched[len(r_cum_matched):] += r_cum_matched[-1] r_recall[len(r_cum_matched):] += r_cum_matched[-1] / len(truth) t_prec = t_total_matched / q / sp.arange(1, topk + 1) t_recall = t_recall / q r_prec = r_total_matched / q / sp.arange(1, topk + 1) r_recall = r_recall / q prec = total_matched / p / sp.arange(1, topk + 1) recall = recall / p print('preds in tails:', q) print('preds in non-tails:', p) return np.round(t_prec, 4), np.round(t_recall, 4), np.round(prec, 4), np.round( recall, 4), np.round(r_prec, 4), np.round(r_recall, 4)
def reduce(PSI_l, Xl, coverage_threshold): """ Computes set cover reduction to get the most relevant samples that define the class Xl. :param PSI_l: (Nl x 2) matrix containing both the scale and the shape of the weibull distribution :param Xl: (Nl x dimension_feature_vector) matrix containing the feature vectors of each instance of a class :param coverage_threshold: Probability above which we consider an instance to be not enough representative of its class :return: The indexes of the most representative samples of a class """ #This matrix D is symmetric D = ppp_cosine_similarity(Xl, Xl) # Number of instances of the class Nl = np.shape(D)[0] S = [] for i in range(Nl): Si = [] for j in range(Nl): if (psi_i_dist(D[i, j], PSI_l[i, 0], PSI_l[i, 1]) >= coverage_threshold): # Sample i is redundant with respect to j Si.append(j) S.append(Si) # Universe U = list(range(0, Nl)) # Covered index C = [] # Final indexs I = [] #Set Cover Implementation while (len(scipy.intersect1d(C, U)) != len(U)): # punct_ref is a counter to find the maximum in every iteration punct_ref = 0 # ind represent the index that we will append to our index's list ind = 0 index_s = 0 for s in S: punct = 0 relative_inclusion = scipy.isin(s, C) for eleme in relative_inclusion: if (eleme is False): punct += 1 if (punct >= punct_ref): ind = index_s index_s += 1 C = scipy.union1d(C, S[ind]) I.append(ind) S.remove(S[ind]) if (len(S) == 0): break return I
def evaluate(ans, preds, topk=1): total_matched = sp.zeros(topk, dtype=sp.uint64) recall = sp.zeros(topk, dtype=sp.float64) for i in range(ans.shape[0]): truth = ans.indices[ans.indptr[i]:ans.indptr[i + 1]] matched = sp.isin( preds.indices[preds.indptr[i]:preds.indptr[i + 1]][:topk], truth) cum_matched = sp.cumsum(matched, dtype=sp.uint64) total_matched[:len(cum_matched)] += cum_matched recall[:len(cum_matched)] += cum_matched / len(truth) if len(cum_matched) != 0: total_matched[len(cum_matched):] += cum_matched[-1] recall[len(cum_matched):] += cum_matched[-1] / len(truth) prec = total_matched / ans.shape[0] / sp.arange(1, topk + 1) recall = recall / ans.shape[0] return np.round(prec, 4), np.round(recall, 4)
def generate(cls, tY, pY, topk=10): assert isinstance(tY, smat.csr_matrix), type(tY) assert isinstance(pY, smat.csr_matrix), type(pY) assert tY.shape == pY.shape, "tY.shape = {}, pY.shape = {}".format( tY.shape, pY.shape) total_matched = sp.zeros(topk, dtype=sp.uint64) recall = sp.zeros(topk, dtype=sp.float64) for i in range(tY.shape[0]): truth = tY.indices[tY.indptr[i]:tY.indptr[i + 1]] matched = sp.isin(pY.indices[pY.indptr[i]:pY.indptr[i + 1]][:topk], truth) cum_matched = sp.cumsum(matched, dtype=sp.uint64) total_matched[:len(cum_matched)] += cum_matched recall[:len(cum_matched)] += cum_matched / len(truth) if len(cum_matched) != 0: total_matched[len(cum_matched):] += cum_matched[-1] recall[len(cum_matched):] += cum_matched[-1] / len(truth) prec = total_matched / tY.shape[0] / sp.arange(1, topk + 1) recall = recall / tY.shape[0] return cls(prec=prec, recall=recall)
def trim_nonpercolating_paths(im, inlet_axis=0, outlet_axis=0): r""" Removes all nonpercolating paths between specified edges This function is essential when performing transport simulations on an image, since image regions that do not span between the desired inlet and outlet do not contribute to the transport. Parameters ---------- im : ND-array The image of the porous material with ```True`` values indicating the phase of interest inlet_axis : int Inlet axis of boundary condition. For three dimensional image the number ranges from 0 to 2. For two dimensional image the range is between 0 to 1. outlet_axis : int Outlet axis of boundary condition. For three dimensional image the number ranges from 0 to 2. For two dimensional image the range is between 0 to 1. Returns ------- image : ND-array A copy of ``im`` with all the nonpercolating paths removed See Also -------- find_disconnected_voxels trim_floating_solid trim_blind_pores """ im = trim_floating_solid(~im) labels = spim.label(~im)[0] inlet = sp.zeros_like(im, dtype=int) outlet = sp.zeros_like(im, dtype=int) if im.ndim == 3: if inlet_axis == 0: inlet[0, :, :] = 1 elif inlet_axis == 1: inlet[:, 0, :] = 1 elif inlet_axis == 2: inlet[:, :, 0] = 1 if outlet_axis == 0: outlet[-1, :, :] = 1 elif outlet_axis == 1: outlet[:, -1, :] = 1 elif outlet_axis == 2: outlet[:, :, -1] = 1 if im.ndim == 2: if inlet_axis == 0: inlet[0, :] = 1 elif inlet_axis == 1: inlet[:, 0] = 1 if outlet_axis == 0: outlet[-1, :] = 1 elif outlet_axis == 1: outlet[:, -1] = 1 IN = sp.unique(labels * inlet) OUT = sp.unique(labels * outlet) new_im = sp.isin(labels, list(set(IN) ^ set(OUT)), invert=True) im[new_im == 0] = True return ~im
def create_dla_cat(args): ### DLA data h = fitsio.FITS(args.in_dir + '/master_DLA.fits') md_data = sp.sort(h[1].read(), order=['MOCKID', 'Z_QSO_RSD']) data = {} for k in ['RA', 'DEC']: data[k] = md_data[k][:] for k in ['THING_ID', 'PLATE', 'MJD', 'FIBERID']: data[k] = md_data['MOCKID'][:] data['Z'] = md_data['Z_DLA_RSD'][:] # Ensure that DLAs are in the rest frame wavelength range if required data['Z_QSO'] = md_data['Z_QSO_RSD'][:] w = sp.ones(data['Z_QSO'].shape).astype('bool') lr_DLA = lya * (1 + data['Z']) / (1 + data['Z_QSO']) if args.DLA_lambda_rest_min is not None: w *= (lr_DLA > args.DLA_lambda_rest_min) if args.DLA_lambda_rest_max is not None: w *= (lr_DLA < args.DLA_lambda_rest_max) w *= data['Z'] > args.min_cat_z for k in data.keys(): data[k] = data[k][w] h.close() phi = data['RA'] * sp.pi / 180. th = sp.pi / 2. - data['DEC'] * sp.pi / 180. pix = healpy.ang2pix(args.nside, th, phi) data['PIX'] = pix print('INFO: {} DLA in mocks data'.format(data['RA'].size)) ### Save DLA data if args.single_DLA_per_skw: reduced_THING_ID = data['THING_ID'][w_DLA] n_id = 1 current_m = reduced_THING_ID[0] ind = 0 inds = [] for i, m in enumerate(reduced_THING_ID[1:]): i += 1 if m == current_m: n_id += 1 p = state.uniform() if p > 1 / n_id: ind = i else: current_m = m inds += [ind] ind = i n_id = 1 w_DLA = sp.isin(range(len(data['THING_ID'])), inds) else: w_DLA = sp.isin(data['THING_ID'], w_thid) N_DLA = sp.sum(w_DLA) print('INFO: downsampling leaves {} DLAs in catalog'.format(N_DLA)) suffix = '' if args.single_DLA_per_skw: suffix += '_single' if args.DLA_lambda_rest_min is not None: suffix += '_lrmin{}'.format(args.DLA_lambda_rest_min) if args.DLA_lambda_rest_max is not None: suffix += '_lrmax{}'.format(args.DLA_lambda_rest_max) out = fitsio.FITS(args.out_dir + '/zcat_DLA_{}{}.fits'.format(args.downsampling, suffix), 'rw', clobber=True) cols = [v[w_DLA] for k, v in data.items() if k not in ['PIX', 'Z_QSO']] names = [k for k in data.keys() if k not in ['PIX', 'Z_QSO']] out.write(cols, names=names) out.close() if args.make_randoms_zcats: r_state = sp.random.RandomState(args.randoms_downsampling_seed) ### Data h = fitsio.FITS(args.randoms_dir + '/master_randoms.fits') data = {} mr_data = sp.sort(h[1].read(), order=['MOCKID', 'Z']) for k in ['RA', 'DEC']: data[k] = mr_data[k][:] for k in ['THING_ID', 'PLATE', 'MJD', 'FIBERID']: data[k] = mr_data['MOCKID'][:] data['Z'] = mr_data['Z'][:] w = data['Z'] > args.min_cat_z for k in data.keys(): data[k] = data[k][w] h.close() phi = data['RA'] * sp.pi / 180. th = sp.pi / 2. - data['DEC'] * sp.pi / 180. pix = healpy.ang2pix(args.nside, th, phi) data['PIX'] = pix print('INFO: {} QSO in randoms'.format(data['RA'].size)) ### Get reduced data numbers original_nbData = data['RA'].shape[0] nbData = round(original_nbData * args.randoms_downsampling) ### Save data assert nbData <= data['RA'].size w = r_state.choice(sp.arange(data['RA'].size), size=nbData, replace=False) print( 'INFO: downsampling to {} QSOs in randoms catalog'.format(nbData)) out = fitsio.FITS( args.out_dir + '/zcat_{}_randoms.fits'.format(args.randoms_downsampling), 'rw', clobber=True) cols = [v[w] for k, v in data.items() if k not in ['PIX']] names = [k for k in data.keys() if k not in ['PIX']] out.write(cols, names=names) out.close() ### DLA randoms h = fitsio.FITS(args.randoms_dir + '/master_DLA_randoms.fits') mdr_data = sp.sort(h[1].read(), order=['MOCKID', 'Z_QSO_RSD']) N_DLA_rand = mdr_data.shape[0] data = {} for k in ['RA', 'DEC']: data[k] = mdr_data[k][:] for k in ['THING_ID', 'PLATE', 'MJD', 'FIBERID']: data[k] = mdr_data['MOCKID'][:] data['Z'] = mdr_data['Z_DLA'][:] data['Z_QSO'] = mdr_data['Z_QSO_RSD'][:] # Ensure that DLAs are in the rest frame wavelength range if required w = sp.ones(data['Z_QSO'].shape).astype('bool') lr_DLA = lya * (1 + data['Z']) / (1 + data['Z_QSO']) if args.DLA_lambda_rest_min is not None: w *= (lr_DLA > args.DLA_lambda_rest_min) if args.DLA_lambda_rest_max is not None: w *= (lr_DLA < args.DLA_lambda_rest_max) w *= data['Z'] > args.min_cat_z for k in data.keys(): data[k] = data[k][w] h.close() phi = data['RA'] * sp.pi / 180. th = sp.pi / 2. - data['DEC'] * sp.pi / 180. pix = healpy.ang2pix(args.nside, th, phi) data['PIX'] = pix print('INFO: {} DLA in randoms'.format(data['RA'].size)) ### Save DLA data if args.single_DLA_per_skw: reduced_THING_ID = data['THING_ID'][w_DLA] n_id = 1 current_m = reduced_THING_ID[0] ind = 0 inds = [] for i, m in enumerate(reduced_THING_ID[1:]): i += 1 if m == current_m: n_id += 1 p = state.uniform() if p > 1 / n_id: ind = i else: current_m = m inds += [ind] ind = i n_id = 1 w_DLA = sp.isin(range(len(data['THING_ID'])), inds) else: w_DLA = sp.isin(data['THING_ID'], w_thid) #Then downsample using a modified ratio to take into account the removal of QSOs. mod_r_ds = args.randoms_downsampling / args.downsampling w_DLA *= r_state.choice([0, 1], size=data['THING_ID'].shape[0], replace=True, p=[1 - mod_r_ds, mod_r_ds]).astype('bool') print('INFO: downsampling leaves {} DLAs in randoms catalog'.format( sp.sum(w_DLA))) suffix = '' if args.single_DLA_per_skw: suffix += '_single' if args.DLA_lambda_rest_min is not None: suffix += '_lrmin{}'.format(args.DLA_lambda_rest_min) if args.DLA_lambda_rest_max is not None: suffix += '_lrmax{}'.format(args.DLA_lambda_rest_max) out = fitsio.FITS(args.out_dir + '/zcat_DLA_{}_randoms{}.fits'.format( args.randoms_downsampling, suffix), 'rw', clobber=True) cols = [v[w_DLA] for k, v in data.items() if k not in ['PIX', 'Z_QSO']] names = [k for k in data.keys() if k not in ['PIX', 'Z_QSO']] out.write(cols, names=names) out.close() return