示例#1
0
def evaluate_tails(ans, preds, tails, topk=1):
    total_matched = sp.zeros(topk, dtype=sp.uint64)
    t_total_matched = sp.zeros(topk, dtype=sp.uint64)
    r_total_matched = sp.zeros(topk, dtype=sp.uint64)
    recall = sp.zeros(topk, dtype=sp.float64)
    t_recall = sp.zeros(topk, dtype=sp.float64)
    r_recall = sp.zeros(topk, dtype=sp.float64)
    q = 0
    p = 0
    r = 0
    for i in trange(ans.shape[0]):
        truth = ans.indices[ans.indptr[i]:ans.indptr[i + 1]]
        tail_truth = get_in_tails(truth, tails)
        if not len(tail_truth):
            p += 1
            t_preds = preds.indices[preds.indptr[i]:preds.indptr[i + 1]][:topk]
            matched = sp.isin(t_preds, truth)
            cum_matched = sp.cumsum(matched, dtype=sp.uint64)
            total_matched[:len(cum_matched)] += cum_matched
            recall[:len(cum_matched)] += cum_matched / len(truth)
            if len(cum_matched) != 0:
                total_matched[len(cum_matched):] += cum_matched[-1]
                recall[len(cum_matched):] += cum_matched[-1] / len(truth)
                continue
        q += 1
        t_preds = preds.indices[preds.indptr[i]:preds.indptr[i + 1]][:topk]
        t_matched = sp.isin(t_preds, tail_truth)
        r_matched = sp.isin(t_preds, truth)
        t_cum_matched = sp.cumsum(t_matched, dtype=sp.uint64)
        r_cum_matched = sp.cumsum(r_matched, dtype=sp.uint64)
        t_total_matched[:len(t_cum_matched)] += t_cum_matched
        r_total_matched[:len(r_cum_matched)] += r_cum_matched
        t_recall[:len(t_cum_matched)] += t_cum_matched / len(tail_truth)
        r_recall[:len(r_cum_matched)] += r_cum_matched / len(truth)
        if len(t_cum_matched) != 0:
            t_total_matched[len(t_cum_matched):] += t_cum_matched[-1]
            t_recall[len(t_cum_matched
                         ):] += t_cum_matched[-1] / len(tail_truth)

        if len(r_cum_matched) != 0:
            r_total_matched[len(r_cum_matched):] += r_cum_matched[-1]
            r_recall[len(r_cum_matched):] += r_cum_matched[-1] / len(truth)

    t_prec = t_total_matched / q / sp.arange(1, topk + 1)
    t_recall = t_recall / q
    r_prec = r_total_matched / q / sp.arange(1, topk + 1)
    r_recall = r_recall / q
    prec = total_matched / p / sp.arange(1, topk + 1)
    recall = recall / p
    print('preds in tails:', q)
    print('preds in non-tails:', p)
    return np.round(t_prec,
                    4), np.round(t_recall, 4), np.round(prec, 4), np.round(
                        recall, 4), np.round(r_prec, 4), np.round(r_recall, 4)
示例#2
0
def reduce(PSI_l, Xl, coverage_threshold):
    """
    Computes set cover reduction to get the most relevant samples that define the class Xl.
    :param PSI_l: (Nl x 2) matrix containing both the scale and the shape of the weibull distribution
    :param Xl: (Nl x dimension_feature_vector) matrix containing the feature vectors of each instance of a class
    :param coverage_threshold: Probability above which we consider an instance to be not enough representative of its class
    :return: The indexes of the most representative samples of a class
    """
    #This matrix D is symmetric
    D = ppp_cosine_similarity(Xl, Xl)
    # Number of instances of the class
    Nl = np.shape(D)[0]

    S = []
    for i in range(Nl):
        Si = []
        for j in range(Nl):
            if (psi_i_dist(D[i, j], PSI_l[i, 0], PSI_l[i, 1]) >=
                    coverage_threshold):
                # Sample i is redundant with respect to j
                Si.append(j)
        S.append(Si)
    # Universe
    U = list(range(0, Nl))
    # Covered index
    C = []
    # Final indexs
    I = []

    #Set Cover Implementation
    while (len(scipy.intersect1d(C, U)) != len(U)):
        # punct_ref is a counter to find the maximum in every iteration
        punct_ref = 0
        # ind represent the index that we will append to our index's list
        ind = 0
        index_s = 0
        for s in S:
            punct = 0
            relative_inclusion = scipy.isin(s, C)
            for eleme in relative_inclusion:
                if (eleme is False):
                    punct += 1
            if (punct >= punct_ref):
                ind = index_s
            index_s += 1

        C = scipy.union1d(C, S[ind])
        I.append(ind)
        S.remove(S[ind])
        if (len(S) == 0):
            break
    return I
示例#3
0
def evaluate(ans, preds, topk=1):
    total_matched = sp.zeros(topk, dtype=sp.uint64)
    recall = sp.zeros(topk, dtype=sp.float64)
    for i in range(ans.shape[0]):
        truth = ans.indices[ans.indptr[i]:ans.indptr[i + 1]]
        matched = sp.isin(
            preds.indices[preds.indptr[i]:preds.indptr[i + 1]][:topk], truth)
        cum_matched = sp.cumsum(matched, dtype=sp.uint64)
        total_matched[:len(cum_matched)] += cum_matched
        recall[:len(cum_matched)] += cum_matched / len(truth)
        if len(cum_matched) != 0:
            total_matched[len(cum_matched):] += cum_matched[-1]
            recall[len(cum_matched):] += cum_matched[-1] / len(truth)
    prec = total_matched / ans.shape[0] / sp.arange(1, topk + 1)
    recall = recall / ans.shape[0]
    return np.round(prec, 4), np.round(recall, 4)
示例#4
0
 def generate(cls, tY, pY, topk=10):
     assert isinstance(tY, smat.csr_matrix), type(tY)
     assert isinstance(pY, smat.csr_matrix), type(pY)
     assert tY.shape == pY.shape, "tY.shape = {}, pY.shape = {}".format(
         tY.shape, pY.shape)
     total_matched = sp.zeros(topk, dtype=sp.uint64)
     recall = sp.zeros(topk, dtype=sp.float64)
     for i in range(tY.shape[0]):
         truth = tY.indices[tY.indptr[i]:tY.indptr[i + 1]]
         matched = sp.isin(pY.indices[pY.indptr[i]:pY.indptr[i + 1]][:topk],
                           truth)
         cum_matched = sp.cumsum(matched, dtype=sp.uint64)
         total_matched[:len(cum_matched)] += cum_matched
         recall[:len(cum_matched)] += cum_matched / len(truth)
         if len(cum_matched) != 0:
             total_matched[len(cum_matched):] += cum_matched[-1]
             recall[len(cum_matched):] += cum_matched[-1] / len(truth)
     prec = total_matched / tY.shape[0] / sp.arange(1, topk + 1)
     recall = recall / tY.shape[0]
     return cls(prec=prec, recall=recall)
示例#5
0
def trim_nonpercolating_paths(im, inlet_axis=0, outlet_axis=0):
    r"""
    Removes all nonpercolating paths between specified edges

    This function is essential when performing transport simulations on an
    image, since image regions that do not span between the desired inlet and
    outlet do not contribute to the transport.

    Parameters
    ----------
    im : ND-array
        The image of the porous material with ```True`` values indicating the
        phase of interest

    inlet_axis : int
        Inlet axis of boundary condition. For three dimensional image the
        number ranges from 0 to 2. For two dimensional image the range is
        between 0 to 1.

    outlet_axis : int
        Outlet axis of boundary condition. For three dimensional image the
        number ranges from 0 to 2. For two dimensional image the range is
        between 0 to 1.

    Returns
    -------
    image : ND-array
        A copy of ``im`` with all the nonpercolating paths removed

    See Also
    --------
    find_disconnected_voxels
    trim_floating_solid
    trim_blind_pores

    """
    im = trim_floating_solid(~im)
    labels = spim.label(~im)[0]
    inlet = sp.zeros_like(im, dtype=int)
    outlet = sp.zeros_like(im, dtype=int)
    if im.ndim == 3:
        if inlet_axis == 0:
            inlet[0, :, :] = 1
        elif inlet_axis == 1:
            inlet[:, 0, :] = 1
        elif inlet_axis == 2:
            inlet[:, :, 0] = 1

        if outlet_axis == 0:
            outlet[-1, :, :] = 1
        elif outlet_axis == 1:
            outlet[:, -1, :] = 1
        elif outlet_axis == 2:
            outlet[:, :, -1] = 1

    if im.ndim == 2:
        if inlet_axis == 0:
            inlet[0, :] = 1
        elif inlet_axis == 1:
            inlet[:, 0] = 1

        if outlet_axis == 0:
            outlet[-1, :] = 1
        elif outlet_axis == 1:
            outlet[:, -1] = 1
    IN = sp.unique(labels * inlet)
    OUT = sp.unique(labels * outlet)
    new_im = sp.isin(labels, list(set(IN) ^ set(OUT)), invert=True)
    im[new_im == 0] = True
    return ~im
def create_dla_cat(args):

    ### DLA data
    h = fitsio.FITS(args.in_dir + '/master_DLA.fits')
    md_data = sp.sort(h[1].read(), order=['MOCKID', 'Z_QSO_RSD'])
    data = {}
    for k in ['RA', 'DEC']:
        data[k] = md_data[k][:]
    for k in ['THING_ID', 'PLATE', 'MJD', 'FIBERID']:
        data[k] = md_data['MOCKID'][:]
    data['Z'] = md_data['Z_DLA_RSD'][:]
    # Ensure that DLAs are in the rest frame wavelength range if required
    data['Z_QSO'] = md_data['Z_QSO_RSD'][:]
    w = sp.ones(data['Z_QSO'].shape).astype('bool')
    lr_DLA = lya * (1 + data['Z']) / (1 + data['Z_QSO'])
    if args.DLA_lambda_rest_min is not None:
        w *= (lr_DLA > args.DLA_lambda_rest_min)
    if args.DLA_lambda_rest_max is not None:
        w *= (lr_DLA < args.DLA_lambda_rest_max)
    w *= data['Z'] > args.min_cat_z
    for k in data.keys():
        data[k] = data[k][w]
    h.close()
    phi = data['RA'] * sp.pi / 180.
    th = sp.pi / 2. - data['DEC'] * sp.pi / 180.
    pix = healpy.ang2pix(args.nside, th, phi)
    data['PIX'] = pix
    print('INFO: {} DLA in mocks data'.format(data['RA'].size))

    ### Save DLA data
    if args.single_DLA_per_skw:
        reduced_THING_ID = data['THING_ID'][w_DLA]
        n_id = 1
        current_m = reduced_THING_ID[0]
        ind = 0
        inds = []
        for i, m in enumerate(reduced_THING_ID[1:]):
            i += 1
            if m == current_m:
                n_id += 1
                p = state.uniform()
                if p > 1 / n_id:
                    ind = i
            else:
                current_m = m
                inds += [ind]
                ind = i
                n_id = 1
        w_DLA = sp.isin(range(len(data['THING_ID'])), inds)
    else:
        w_DLA = sp.isin(data['THING_ID'], w_thid)

    N_DLA = sp.sum(w_DLA)
    print('INFO: downsampling leaves {} DLAs in catalog'.format(N_DLA))
    suffix = ''
    if args.single_DLA_per_skw:
        suffix += '_single'
    if args.DLA_lambda_rest_min is not None:
        suffix += '_lrmin{}'.format(args.DLA_lambda_rest_min)
    if args.DLA_lambda_rest_max is not None:
        suffix += '_lrmax{}'.format(args.DLA_lambda_rest_max)
    out = fitsio.FITS(args.out_dir +
                      '/zcat_DLA_{}{}.fits'.format(args.downsampling, suffix),
                      'rw',
                      clobber=True)
    cols = [v[w_DLA] for k, v in data.items() if k not in ['PIX', 'Z_QSO']]
    names = [k for k in data.keys() if k not in ['PIX', 'Z_QSO']]
    out.write(cols, names=names)
    out.close()

    if args.make_randoms_zcats:
        r_state = sp.random.RandomState(args.randoms_downsampling_seed)

        ### Data
        h = fitsio.FITS(args.randoms_dir + '/master_randoms.fits')
        data = {}
        mr_data = sp.sort(h[1].read(), order=['MOCKID', 'Z'])
        for k in ['RA', 'DEC']:
            data[k] = mr_data[k][:]
        for k in ['THING_ID', 'PLATE', 'MJD', 'FIBERID']:
            data[k] = mr_data['MOCKID'][:]
        data['Z'] = mr_data['Z'][:]
        w = data['Z'] > args.min_cat_z
        for k in data.keys():
            data[k] = data[k][w]
        h.close()
        phi = data['RA'] * sp.pi / 180.
        th = sp.pi / 2. - data['DEC'] * sp.pi / 180.
        pix = healpy.ang2pix(args.nside, th, phi)
        data['PIX'] = pix
        print('INFO: {} QSO in randoms'.format(data['RA'].size))

        ### Get reduced data numbers
        original_nbData = data['RA'].shape[0]
        nbData = round(original_nbData * args.randoms_downsampling)

        ### Save data
        assert nbData <= data['RA'].size
        w = r_state.choice(sp.arange(data['RA'].size),
                           size=nbData,
                           replace=False)
        print(
            'INFO: downsampling to {} QSOs in randoms catalog'.format(nbData))
        out = fitsio.FITS(
            args.out_dir +
            '/zcat_{}_randoms.fits'.format(args.randoms_downsampling),
            'rw',
            clobber=True)
        cols = [v[w] for k, v in data.items() if k not in ['PIX']]
        names = [k for k in data.keys() if k not in ['PIX']]
        out.write(cols, names=names)
        out.close()

        ### DLA randoms
        h = fitsio.FITS(args.randoms_dir + '/master_DLA_randoms.fits')
        mdr_data = sp.sort(h[1].read(), order=['MOCKID', 'Z_QSO_RSD'])
        N_DLA_rand = mdr_data.shape[0]

        data = {}
        for k in ['RA', 'DEC']:
            data[k] = mdr_data[k][:]
        for k in ['THING_ID', 'PLATE', 'MJD', 'FIBERID']:
            data[k] = mdr_data['MOCKID'][:]
        data['Z'] = mdr_data['Z_DLA'][:]
        data['Z_QSO'] = mdr_data['Z_QSO_RSD'][:]
        # Ensure that DLAs are in the rest frame wavelength range if required
        w = sp.ones(data['Z_QSO'].shape).astype('bool')
        lr_DLA = lya * (1 + data['Z']) / (1 + data['Z_QSO'])
        if args.DLA_lambda_rest_min is not None:
            w *= (lr_DLA > args.DLA_lambda_rest_min)
        if args.DLA_lambda_rest_max is not None:
            w *= (lr_DLA < args.DLA_lambda_rest_max)
        w *= data['Z'] > args.min_cat_z
        for k in data.keys():
            data[k] = data[k][w]
        h.close()
        phi = data['RA'] * sp.pi / 180.
        th = sp.pi / 2. - data['DEC'] * sp.pi / 180.
        pix = healpy.ang2pix(args.nside, th, phi)
        data['PIX'] = pix
        print('INFO: {} DLA in randoms'.format(data['RA'].size))

        ### Save DLA data
        if args.single_DLA_per_skw:
            reduced_THING_ID = data['THING_ID'][w_DLA]
            n_id = 1
            current_m = reduced_THING_ID[0]
            ind = 0
            inds = []
            for i, m in enumerate(reduced_THING_ID[1:]):
                i += 1
                if m == current_m:
                    n_id += 1
                    p = state.uniform()
                    if p > 1 / n_id:
                        ind = i
                else:
                    current_m = m
                    inds += [ind]
                    ind = i
                    n_id = 1
            w_DLA = sp.isin(range(len(data['THING_ID'])), inds)
        else:
            w_DLA = sp.isin(data['THING_ID'], w_thid)

        #Then downsample using a modified ratio to take into account the removal of QSOs.
        mod_r_ds = args.randoms_downsampling / args.downsampling
        w_DLA *= r_state.choice([0, 1],
                                size=data['THING_ID'].shape[0],
                                replace=True,
                                p=[1 - mod_r_ds, mod_r_ds]).astype('bool')

        print('INFO: downsampling leaves {} DLAs in randoms catalog'.format(
            sp.sum(w_DLA)))
        suffix = ''
        if args.single_DLA_per_skw:
            suffix += '_single'
        if args.DLA_lambda_rest_min is not None:
            suffix += '_lrmin{}'.format(args.DLA_lambda_rest_min)
        if args.DLA_lambda_rest_max is not None:
            suffix += '_lrmax{}'.format(args.DLA_lambda_rest_max)
        out = fitsio.FITS(args.out_dir + '/zcat_DLA_{}_randoms{}.fits'.format(
            args.randoms_downsampling, suffix),
                          'rw',
                          clobber=True)
        cols = [v[w_DLA] for k, v in data.items() if k not in ['PIX', 'Z_QSO']]
        names = [k for k in data.keys() if k not in ['PIX', 'Z_QSO']]
        out.write(cols, names=names)
        out.close()

    return