def make(N, Nseed, D): # generate random data in latent space Nboth = N + Nseed Z = randn((Nboth, D)) # generate X and Y from Z. # they should be random transformations of Z with added noise. X = words.Words() Y = words.Words() # create a set of words with which it will be easy to run edit distance #X.words = ['a' + str(i) for i in xrange(Ns)] + ['b' + str(i) for i in xrange(Nr)] #Y.words = ['a' + str(i) for i in xrange(Ns)] + ['c' + str(i*2) for i in xrange(Nr)] X.words = np.array([str(i) for i in xrange(Nboth)]) Y.words = np.array([str(i) for i in xrange(Nboth)]) X.freq = np.array([i for i in xrange(Nboth)]) # mock frequencies Y.freq = np.array([i for i in xrange(Nboth)]) X.features = Z Y.features = Z # create random permutation, but keep last (1-q) in place #X.G = np.zeros((N, N), dtype=np.float) #Y.G = np.zeros((N, N), dtype=np.float) X.features = random_proj_shift(X.features) Y.features = random_proj_shift(Y.features) # permute Y randomly pi = perm.ID(N) topX = Nboth # int(0.92*N) pi = np.append(perm.randperm(pi[:topX]), pi[topX:]) Y.permuteFirstWords(pi) return X, Y, pi
# G[i][j] = self.K[word_i][word_j] # # return np.mat(G) if __name__ == '__main__': # test np.random.seed(1) N = 400 D = 800 DD = dict() rangeD = range(D) rangeN = range(N) for i in xrange(N): DD[i] = dict() S = perm.randperm(rangeD) S = S[:D/4] for j in S: DD[i][j] = randn((1, 1))[0, 0] print "finished constructing." t = time.time() # K = DictDictKernel(DD) # #import cProfile # #cProfile.runct("K.compute(R, R)", globals(), locals()) # K.compute(rangeN, rangeN) # G = K.materialize(rangeN, rangeN) # print 'elapsed', time.time() - t # print G