def findAnchors(Q, K, params, candidates=None): eps = params['eps'] # Random number generator for generating dimension reduction if params['seed'] > 0: prng_W = RandomState(params['seed']) else: prng_W = RandomState(None) #checkpoint_prefix = params['checkpoint_prefix'] new_dim = params['new_dim'] if candidates == None: candidates = np.arange(Q.shape[0]) # row normalize Q row_sums = Q.sum(1) row_sums[row_sums < eps] = eps for i in xrange(len(Q[:, 0])): Q[i, :] = Q[i, :]/float(row_sums[i]) # Reduced dimension random projection method for recovering anchor words Q_red = rp.Random_Projection(Q.T, new_dim, prng_W) Q_red = Q_red.T (anchors, anchor_indices) = gs.Projection_Find(Q_red, K, candidates) # restore the original Q for i in xrange(len(Q[:, 0])): Q[i, :] = Q[i, :]*float(row_sums[i]) return anchor_indices
def find_anchors(Q, K, candidates, dim, seed): # Random number generator for generating dimension reduction prng_W = np.random.RandomState(seed) # row normalize Q row_sums = Q.sum(1) for i in range(len(Q[:, 0])): Q[i, :] = Q[i, :] / float(row_sums[i]) # Reduced dimension random projection method for recovering anchor words Q_red = rp.Random_Projection(Q.T, dim, prng_W) Q_red = Q_red.T (anchors, anchor_indices) = gs.Projection_Find(Q_red, K, candidates) # restore the original Q for i in range(len(Q[:, 0])): Q[i, :] = Q[i, :] * float(row_sums[i]) return anchor_indices
def findAnchors(Q, K, params, candidates): # Random number generator for generating dimension reduction prng_W = RandomState(params.seed) checkpoint_prefix = params.checkpoint_prefix new_dim = params.new_dim # row normalize Q row_sums = Q.sum(1) for i in xrange(len(Q[:, 0])): Q[i, :] = Q[i, :] / float(row_sums[i]) # Reduced dimension random projection method for recovering anchor words Q_red = rp.Random_Projection(Q.T, new_dim, prng_W) Q_red = Q_red.T (anchors, anchor_indices) = gs.Projection_Find(Q_red, K, candidates) # restore the original Q for i in xrange(len(Q[:, 0])): Q[i, :] = Q[i, :] * float(row_sums[i]) return anchor_indices
def findAnchors(Q, K, params, candidates): # row normalize Q row_sums = Q.sum(axis=1) for i in xrange(len(Q[:, 0])): Q[i, :] = Q[i, :]/float(row_sums[i] + 1e-100) # Reduced dimension random projection method for recovering anchor words if params.lowerDim is None or params.lowerDim >= Q.shape[1]: Q_red = Q.copy() else: # Random number generator for generating dimension reduction prng_W = RandomState(params.seed) Q_red = rp.Random_Projection(Q.T, params.lowerDim, prng_W) Q_red = Q_red.T (anchors, anchor_indices) = gs.Projection_Find(Q_red, K, candidates) # restore the original Q for i in xrange(len(Q[:, 0])): Q[i, :] = Q[i, :]*float(row_sums[i]) return anchor_indices
R = rp.Random_Matrix(V, params.new_dim, prng) #only accept anchors that appear in a significant number of docs print "identifying candidate anchors" candidate_anchors = [] for i in xrange(V): if len(np.nonzero(row_M[i, :])[1]) > params.anchor_thresh: candidate_anchors.append(i) print len(candidate_anchors), "candidates" Q = np.vstack( generate_Q_matrix(row_M, col_M, row_normalize=True, projection_matrix=R.T)) #row-by-row generation _, anchors = gs.Projection_Find(Q, K, candidate_anchors) print "anchors are:", anchors anchor_file = file(outfile + '.anchors', 'w') print >> anchor_file, "\t".join(["topic id", "word id", "word"]) for i, a in enumerate(anchors): print i, vocab[a] print >> anchor_file, "\t".join([str(x) for x in (i, a, vocab[a])]) anchor_file.close() #recover topics row_sums = np.array(row_M.sum(1)).reshape(V) #generate Q_matrix rows for anchors Q_A = np.vstack( generate_Q_matrix(row_M, col_M,