Пример #1
0
def build_rr_kdtree(S, hparams, log=False):
    logr = lambda message: rplog(message, log)

    nrows, ncols = S.shape
    leaf_size = hparams.leaf_size
    logr('Building k-d tree with randomly rotated data '
         'on %i points in %i dims; max. leaf size: %i' %
         (nrows, ncols, leaf_size))

    # Generate a random rotation matrix
    rotmat = np.random.normal(size=[ncols, ncols])
    # Rotate the input data matrix
    rotated_S = np.dot(S, rotmat)

    nodes = deque()
    nidx = 0
    root = Node(0, 0)
    nodes.append((root, root.level, range(nrows)))

    while len(nodes) > 0:
        n, l, idxs = nodes.popleft()
        indent = str('|-') * l
        logr('%sLevel %i, node %i, %i points ....' %
             (indent, l, n.idx, len(idxs)))
        # choose column equaling level % ncols
        colidx = l % ncols

        nidx = split_node(hparams.leaf_size, rotated_S[:, colidx], idxs,
                          indent, n, nidx, l + 1, nodes, logr)

    return {'tree': root, 'rotmat': rotmat, 'ncols': ncols}
Пример #2
0
def traverse_sparse_rptree(tree, log=False):
    logr = lambda message: rplog(message, log)
    nodes = deque()
    nodes.append(tree['tree'])
    D_by_sqrt_d = np.transpose(tree['D_by_sqrt_d'])
    print('Diagonal sign matrix / sqrt(d):', D_by_sqrt_d)

    print('New column length:', tree['new_ncols'])

    level_col_idx = tree['level_col_idx']
    level_rnd_vals = tree['level_rnd_vals'] if 'level_rnd_vals' in tree else []
    use_sign = len(level_rnd_vals) == 0

    while len(nodes) > 0:
        n = nodes.popleft()
        l = n.level
        indent = str('|-') * l
        ms = ''
        if n.leaf:
            ms = 'pidxs:' + str(n.pidxs)
        else:
            nodes.append(n.lchild)
            nodes.append(n.rchild)
            if use_sign:
                pidxs, nidxs = level_col_idx[n.level]
                ms = 'Col idxs: (' + str(pidxs) + ' - ' + str(
                    nidxs) + '), val:' + str(n.val)
            else:
                ms = 'Col idxs:' + str(level_col_idx[n.level]) \
                     + ', hp:' + str(level_rnd_vals[n.level]) + ', val:' + str(n.val)
        logr('%sL %i: leaf?%i, id:%i --> %s' % (indent, l, n.leaf, n.idx, ms))
Пример #3
0
def traverse_rconv_kdtree(tree, log=False):
    logr = lambda message: rplog(message, log)
    nodes = deque()
    nodes.append(tree['tree'])
    print('Random circular convolution vector:', tree['R'])
    print('Random sign vector:', tree['D'])
    ncols = tree['ncols']

    while len(nodes) > 0:
        n = nodes.popleft()
        l = n.level
        indent = str('|-') * l
        ms = ''
        if n.leaf:
            ms = 'pidxs:' + str(n.pidxs)
        else:
            nodes.append(n.lchild)
            nodes.append(n.rchild)
            ms = 'New col:' + str(n.level % ncols) + ', val:' + str(n.val)
        logr('%sL %i: leaf?%i, id:%i --> %s' % (indent, l, n.leaf, n.idx, ms))
Пример #4
0
def traverse_rr_kdtree(tree, log=False):
    logr = lambda message: rplog(message, log)
    nodes = deque()
    nodes.append(tree['tree'])
    rotmat = np.transpose(tree['rotmat'])
    ncols, _ = rotmat.shape
    print('Rotation matrix:')
    print(rotmat)

    while len(nodes) > 0:
        n = nodes.popleft()
        l = n.level
        indent = str('|-') * l
        ms = ''
        if n.leaf:
            ms = 'pidxs:' + str(n.pidxs)
        else:
            nodes.append(n.lchild)
            nodes.append(n.rchild)
            ms = 'New col:' + str(n.level % ncols) + ', val:' + str(n.val)
        logr('%sL %i: leaf?%i, id:%i --> %s' % (indent, l, n.leaf, n.idx, ms))
Пример #5
0
def build_rconv_kdtree(S, hparams, log=False):
    logr = lambda message: rplog(message, log)

    nrows, ncols = S.shape
    leaf_size = hparams.leaf_size
    logr('Building k-d tree with randomly circular convolved data '
         'on %i points in %i dims; max. leaf size: %i' %
         (nrows, ncols, leaf_size))

    # Generate a random vector for circular convolution
    # TODO: Add padding
    R = np.random.normal(size=ncols)
    fft_R = fft(R)
    # Generate the random sign vector
    D = np.random.binomial(n=1, p=0.5, size=ncols) * 2 - 1
    # Convolve the input data matrix
    CC_S = np.array([CC_x(D, fft_R, p) for p in S])

    a, b = CC_S.shape
    assert a == nrows and b == ncols

    nodes = deque()
    nidx = 0
    root = Node(0, 0)
    nodes.append((root, root.level, range(nrows)))

    while len(nodes) > 0:
        n, l, idxs = nodes.popleft()
        indent = str('|-') * l
        logr('%sLevel %i, node %i, %i points ....' %
             (indent, l, n.idx, len(idxs)))
        # choose column equaling level % ncols
        colidx = l % ncols

        nidx = split_node(hparams.leaf_size, CC_S[:, colidx], idxs, indent, n,
                          nidx, l + 1, nodes, logr)

    return {'tree': root, 'R': R, 'fft_R': fft_R, 'D': D, 'ncols': ncols}
Пример #6
0
def traverse_ff_kdtree(tree, log=False):
    logr = lambda message: rplog(message, log)
    nodes = deque()
    nodes.append(tree['tree'])
    print('D:', tree['D'])
    print('G:', tree['G'])
    print('P_seed:', tree['P_seed'])
    ncols = tree['ncols']
    new_ncols = tree['new_ncols']

    print('Data dimensionality %i --> %i' % (ncols, new_ncols))

    while len(nodes) > 0:
        n = nodes.popleft()
        l = n.level
        indent = str('|-') * l
        ms = ''
        if n.leaf:
            ms = 'pidxs:' + str(n.pidxs)
        else:
            nodes.append(n.lchild)
            nodes.append(n.rchild)
            ms = 'New col:' + str(n.level % new_ncols) + ', val:' + str(n.val)
        logr('%sL %i: leaf?%i, id:%i --> %s' % (indent, l, n.leaf, n.idx, ms))
Пример #7
0
def build_sparse_rptree(S, hparams, log=False):
    logr = lambda message: rplog(message, log)

    nrows, ncols = S.shape
    leaf_size = hparams.leaf_size
    logr(
        'Building sparse RP-tree on %i points in %i dims;\nmax. leaf size: %i'
        ', column choice Bernoulli probability %g, \nuse sign random variables %s'
        % (nrows, ncols, leaf_size, hparams.col_prob, str(hparams.use_sign)))

    # Generate a random diagonal sign matrix
    D = np.random.binomial(n=1, p=0.5, size=ncols).astype(float) * 2.0 - 1.0

    # Pad each point to have some power of 2 size
    lncols = np.log2(ncols)
    new_ncols = ncols if int(lncols) == lncols else np.power(
        2,
        int(lncols) + 1)
    logr('Padding %i features to %i with 0' % (ncols, new_ncols))
    pad_vec = np.zeros(new_ncols - ncols)

    # Caching the 1/sqrt(d) operation inside the D sign vector
    D_by_sqrt_d = D / np.sqrt(float(new_ncols))

    HD_S = np.array([HD_x(D_by_sqrt_d, pad_vec, p) for p in S])

    a, b = HD_S.shape
    assert a == nrows and b == new_ncols, (
        'a = %i, nrows = %i, b = %i, new_ncols = %i, ncols = %i' %
        (a, nrows, b, new_ncols, ncols))

    logr('Densified data matrix has shape %s previously %s' %
         (repr(HD_S.shape), repr(S.shape)))

    nodes = deque()
    nidx = 0
    root = Node(0, 0)
    nodes.append((root, root.level, range(nrows)))

    level_col_idx = []
    level_rnd_vals = []
    all_projs_level = None

    while len(nodes) > 0:
        n, l, idxs = nodes.popleft()
        indent = str('|-') * l
        logr('%sLevel %i, node %i, %i points ....' %
             (indent, l, n.idx, len(idxs)))
        if l == len(level_col_idx):
            # this level has no projections yet
            # 1. Choose the column indices for this level
            # 2. Generate a random values for these indices
            #  a. Choose between {-1, +1} or random normal
            # 3. Project all points in the table along this random vector
            tries = 0
            cidxs = []
            while tries < 10 and len(cidxs) == 0:
                tries += 1
                cidx_indicators = np.random.binomial(n=1,
                                                     p=hparams.col_prob,
                                                     size=ncols)
                # FIXME:
                cidxs = [
                    idx for idx, b in enumerate(cidx_indicators) if b == 1
                ]

            if len(cidxs) == 0:
                raise Exception(
                    'No column got selected for projection (after 10 tries)')

            if hparams.use_sign:
                sign_vals = np.random.binomial(n=1, p=0.5, size=len(cidxs))
                all_idxs = zip(cidxs, sign_vals.astype(int))
                poss, negs = [], []
                for sv, cidx in zip(sign_vals, cidxs):
                    if sv == 0:
                        negs.append(cidx)
                    else:
                        poss.append(cidx)
                all_projs_level = (np.sum(HD_S[:, poss], axis=1) -
                                   np.sum(HD_S[:, negs], axis=1))
                level_col_idx.append(all_idxs)
            else:
                hp = np.random.normal(size=len(cidxs))
                all_projs_level = np.dot(HD_S[:, cidxs], hp)
                level_rnd_vals.append(hp)
                level_col_idx.append(cidxs)

        nidx = split_node(hparams.leaf_size, all_projs_level, idxs, indent, n,
                          nidx, l + 1, nodes, logr)

    common_dict = {
        'tree': root,
        'pad': pad_vec,
        'D_by_sqrt_d': D_by_sqrt_d,
        'new_ncols': new_ncols,
        'level_col_idx': level_col_idx
    }

    if hparams.use_sign:
        assert len(level_rnd_vals) == 0
    else:
        common_dict['level_rnd_vals'] = level_rnd_vals

    return common_dict
Пример #8
0
def build_ff_kdtree(S, hparams, log=False):
    logr = lambda message: rplog(message, log)

    nrows, ncols = S.shape
    leaf_size = hparams.leaf_size
    logr('Building k-d tree with data pre-conditioned with FastFood '
         'on %i points in %i dims; max. leaf size: %i' %
         (nrows, ncols, leaf_size))

    # Generate a random diagonal sign matrix D
    D = np.random.binomial(n=1, p=0.5, size=ncols).astype(float) * 2.0 - 1.0

    # Pad each point to have some power of 2 size
    lncols = np.log2(ncols)
    new_ncols = ncols if int(lncols) == lncols else np.power(
        2,
        int(lncols) + 1)
    logr('Padding %i features to %i with 0' % (ncols, new_ncols))
    pad_vec = np.zeros(new_ncols - ncols)

    # Cache the (1/d) operation inside the D sign vector
    D_by_sqrt_d = D / float(new_ncols)

    # Generate a random permutation matrix P
    P_seed = np.random.randint(9999)
    # Generate a random diagonal gaussian matrix G
    G = np.random.normal(size=new_ncols)

    HGPHD_S = np.array(
        [HGPHD_x(D_by_sqrt_d, pad_vec, P_seed, G, p) for p in S])

    logr('FastFood-ed data matrix has shape %s previously %s' %
         (repr(HGPHD_S.shape), repr(S.shape)))

    a, b = HGPHD_S.shape
    assert a == nrows and b == new_ncols

    nodes = deque()
    nidx = 0
    root = Node(0, 0)
    nodes.append((root, root.level, range(nrows)))

    while len(nodes) > 0:
        n, l, idxs = nodes.popleft()
        indent = str('|-') * l
        logr('%sLevel %i, node %i, %i points ....' %
             (indent, l, n.idx, len(idxs)))
        # choose column equaling level % new_ncols
        colidx = l % new_ncols

        nidx = split_node(hparams.leaf_size, HGPHD_S[:, colidx], idxs, indent,
                          n, nidx, l + 1, nodes, logr)

    return {
        'tree': root,
        'ncols': ncols,
        'new_ncols': new_ncols,
        'D_by_sqrt_d': D_by_sqrt_d,
        'pad': pad_vec,
        'P_seed': P_seed,
        'G': G
    }