def build_rr_kdtree(S, hparams, log=False): logr = lambda message: rplog(message, log) nrows, ncols = S.shape leaf_size = hparams.leaf_size logr('Building k-d tree with randomly rotated data ' 'on %i points in %i dims; max. leaf size: %i' % (nrows, ncols, leaf_size)) # Generate a random rotation matrix rotmat = np.random.normal(size=[ncols, ncols]) # Rotate the input data matrix rotated_S = np.dot(S, rotmat) nodes = deque() nidx = 0 root = Node(0, 0) nodes.append((root, root.level, range(nrows))) while len(nodes) > 0: n, l, idxs = nodes.popleft() indent = str('|-') * l logr('%sLevel %i, node %i, %i points ....' % (indent, l, n.idx, len(idxs))) # choose column equaling level % ncols colidx = l % ncols nidx = split_node(hparams.leaf_size, rotated_S[:, colidx], idxs, indent, n, nidx, l + 1, nodes, logr) return {'tree': root, 'rotmat': rotmat, 'ncols': ncols}
def traverse_sparse_rptree(tree, log=False): logr = lambda message: rplog(message, log) nodes = deque() nodes.append(tree['tree']) D_by_sqrt_d = np.transpose(tree['D_by_sqrt_d']) print('Diagonal sign matrix / sqrt(d):', D_by_sqrt_d) print('New column length:', tree['new_ncols']) level_col_idx = tree['level_col_idx'] level_rnd_vals = tree['level_rnd_vals'] if 'level_rnd_vals' in tree else [] use_sign = len(level_rnd_vals) == 0 while len(nodes) > 0: n = nodes.popleft() l = n.level indent = str('|-') * l ms = '' if n.leaf: ms = 'pidxs:' + str(n.pidxs) else: nodes.append(n.lchild) nodes.append(n.rchild) if use_sign: pidxs, nidxs = level_col_idx[n.level] ms = 'Col idxs: (' + str(pidxs) + ' - ' + str( nidxs) + '), val:' + str(n.val) else: ms = 'Col idxs:' + str(level_col_idx[n.level]) \ + ', hp:' + str(level_rnd_vals[n.level]) + ', val:' + str(n.val) logr('%sL %i: leaf?%i, id:%i --> %s' % (indent, l, n.leaf, n.idx, ms))
def traverse_rconv_kdtree(tree, log=False): logr = lambda message: rplog(message, log) nodes = deque() nodes.append(tree['tree']) print('Random circular convolution vector:', tree['R']) print('Random sign vector:', tree['D']) ncols = tree['ncols'] while len(nodes) > 0: n = nodes.popleft() l = n.level indent = str('|-') * l ms = '' if n.leaf: ms = 'pidxs:' + str(n.pidxs) else: nodes.append(n.lchild) nodes.append(n.rchild) ms = 'New col:' + str(n.level % ncols) + ', val:' + str(n.val) logr('%sL %i: leaf?%i, id:%i --> %s' % (indent, l, n.leaf, n.idx, ms))
def traverse_rr_kdtree(tree, log=False): logr = lambda message: rplog(message, log) nodes = deque() nodes.append(tree['tree']) rotmat = np.transpose(tree['rotmat']) ncols, _ = rotmat.shape print('Rotation matrix:') print(rotmat) while len(nodes) > 0: n = nodes.popleft() l = n.level indent = str('|-') * l ms = '' if n.leaf: ms = 'pidxs:' + str(n.pidxs) else: nodes.append(n.lchild) nodes.append(n.rchild) ms = 'New col:' + str(n.level % ncols) + ', val:' + str(n.val) logr('%sL %i: leaf?%i, id:%i --> %s' % (indent, l, n.leaf, n.idx, ms))
def build_rconv_kdtree(S, hparams, log=False): logr = lambda message: rplog(message, log) nrows, ncols = S.shape leaf_size = hparams.leaf_size logr('Building k-d tree with randomly circular convolved data ' 'on %i points in %i dims; max. leaf size: %i' % (nrows, ncols, leaf_size)) # Generate a random vector for circular convolution # TODO: Add padding R = np.random.normal(size=ncols) fft_R = fft(R) # Generate the random sign vector D = np.random.binomial(n=1, p=0.5, size=ncols) * 2 - 1 # Convolve the input data matrix CC_S = np.array([CC_x(D, fft_R, p) for p in S]) a, b = CC_S.shape assert a == nrows and b == ncols nodes = deque() nidx = 0 root = Node(0, 0) nodes.append((root, root.level, range(nrows))) while len(nodes) > 0: n, l, idxs = nodes.popleft() indent = str('|-') * l logr('%sLevel %i, node %i, %i points ....' % (indent, l, n.idx, len(idxs))) # choose column equaling level % ncols colidx = l % ncols nidx = split_node(hparams.leaf_size, CC_S[:, colidx], idxs, indent, n, nidx, l + 1, nodes, logr) return {'tree': root, 'R': R, 'fft_R': fft_R, 'D': D, 'ncols': ncols}
def traverse_ff_kdtree(tree, log=False): logr = lambda message: rplog(message, log) nodes = deque() nodes.append(tree['tree']) print('D:', tree['D']) print('G:', tree['G']) print('P_seed:', tree['P_seed']) ncols = tree['ncols'] new_ncols = tree['new_ncols'] print('Data dimensionality %i --> %i' % (ncols, new_ncols)) while len(nodes) > 0: n = nodes.popleft() l = n.level indent = str('|-') * l ms = '' if n.leaf: ms = 'pidxs:' + str(n.pidxs) else: nodes.append(n.lchild) nodes.append(n.rchild) ms = 'New col:' + str(n.level % new_ncols) + ', val:' + str(n.val) logr('%sL %i: leaf?%i, id:%i --> %s' % (indent, l, n.leaf, n.idx, ms))
def build_sparse_rptree(S, hparams, log=False): logr = lambda message: rplog(message, log) nrows, ncols = S.shape leaf_size = hparams.leaf_size logr( 'Building sparse RP-tree on %i points in %i dims;\nmax. leaf size: %i' ', column choice Bernoulli probability %g, \nuse sign random variables %s' % (nrows, ncols, leaf_size, hparams.col_prob, str(hparams.use_sign))) # Generate a random diagonal sign matrix D = np.random.binomial(n=1, p=0.5, size=ncols).astype(float) * 2.0 - 1.0 # Pad each point to have some power of 2 size lncols = np.log2(ncols) new_ncols = ncols if int(lncols) == lncols else np.power( 2, int(lncols) + 1) logr('Padding %i features to %i with 0' % (ncols, new_ncols)) pad_vec = np.zeros(new_ncols - ncols) # Caching the 1/sqrt(d) operation inside the D sign vector D_by_sqrt_d = D / np.sqrt(float(new_ncols)) HD_S = np.array([HD_x(D_by_sqrt_d, pad_vec, p) for p in S]) a, b = HD_S.shape assert a == nrows and b == new_ncols, ( 'a = %i, nrows = %i, b = %i, new_ncols = %i, ncols = %i' % (a, nrows, b, new_ncols, ncols)) logr('Densified data matrix has shape %s previously %s' % (repr(HD_S.shape), repr(S.shape))) nodes = deque() nidx = 0 root = Node(0, 0) nodes.append((root, root.level, range(nrows))) level_col_idx = [] level_rnd_vals = [] all_projs_level = None while len(nodes) > 0: n, l, idxs = nodes.popleft() indent = str('|-') * l logr('%sLevel %i, node %i, %i points ....' % (indent, l, n.idx, len(idxs))) if l == len(level_col_idx): # this level has no projections yet # 1. Choose the column indices for this level # 2. Generate a random values for these indices # a. Choose between {-1, +1} or random normal # 3. Project all points in the table along this random vector tries = 0 cidxs = [] while tries < 10 and len(cidxs) == 0: tries += 1 cidx_indicators = np.random.binomial(n=1, p=hparams.col_prob, size=ncols) # FIXME: cidxs = [ idx for idx, b in enumerate(cidx_indicators) if b == 1 ] if len(cidxs) == 0: raise Exception( 'No column got selected for projection (after 10 tries)') if hparams.use_sign: sign_vals = np.random.binomial(n=1, p=0.5, size=len(cidxs)) all_idxs = zip(cidxs, sign_vals.astype(int)) poss, negs = [], [] for sv, cidx in zip(sign_vals, cidxs): if sv == 0: negs.append(cidx) else: poss.append(cidx) all_projs_level = (np.sum(HD_S[:, poss], axis=1) - np.sum(HD_S[:, negs], axis=1)) level_col_idx.append(all_idxs) else: hp = np.random.normal(size=len(cidxs)) all_projs_level = np.dot(HD_S[:, cidxs], hp) level_rnd_vals.append(hp) level_col_idx.append(cidxs) nidx = split_node(hparams.leaf_size, all_projs_level, idxs, indent, n, nidx, l + 1, nodes, logr) common_dict = { 'tree': root, 'pad': pad_vec, 'D_by_sqrt_d': D_by_sqrt_d, 'new_ncols': new_ncols, 'level_col_idx': level_col_idx } if hparams.use_sign: assert len(level_rnd_vals) == 0 else: common_dict['level_rnd_vals'] = level_rnd_vals return common_dict
def build_ff_kdtree(S, hparams, log=False): logr = lambda message: rplog(message, log) nrows, ncols = S.shape leaf_size = hparams.leaf_size logr('Building k-d tree with data pre-conditioned with FastFood ' 'on %i points in %i dims; max. leaf size: %i' % (nrows, ncols, leaf_size)) # Generate a random diagonal sign matrix D D = np.random.binomial(n=1, p=0.5, size=ncols).astype(float) * 2.0 - 1.0 # Pad each point to have some power of 2 size lncols = np.log2(ncols) new_ncols = ncols if int(lncols) == lncols else np.power( 2, int(lncols) + 1) logr('Padding %i features to %i with 0' % (ncols, new_ncols)) pad_vec = np.zeros(new_ncols - ncols) # Cache the (1/d) operation inside the D sign vector D_by_sqrt_d = D / float(new_ncols) # Generate a random permutation matrix P P_seed = np.random.randint(9999) # Generate a random diagonal gaussian matrix G G = np.random.normal(size=new_ncols) HGPHD_S = np.array( [HGPHD_x(D_by_sqrt_d, pad_vec, P_seed, G, p) for p in S]) logr('FastFood-ed data matrix has shape %s previously %s' % (repr(HGPHD_S.shape), repr(S.shape))) a, b = HGPHD_S.shape assert a == nrows and b == new_ncols nodes = deque() nidx = 0 root = Node(0, 0) nodes.append((root, root.level, range(nrows))) while len(nodes) > 0: n, l, idxs = nodes.popleft() indent = str('|-') * l logr('%sLevel %i, node %i, %i points ....' % (indent, l, n.idx, len(idxs))) # choose column equaling level % new_ncols colidx = l % new_ncols nidx = split_node(hparams.leaf_size, HGPHD_S[:, colidx], idxs, indent, n, nidx, l + 1, nodes, logr) return { 'tree': root, 'ncols': ncols, 'new_ncols': new_ncols, 'D_by_sqrt_d': D_by_sqrt_d, 'pad': pad_vec, 'P_seed': P_seed, 'G': G }