def test8(): n = 500 r = 50 nt = 100 #rcross = 50 d = 50 hubs = 1 verbose = False num_eval = 100 #X, Y = createFakeData(n, r, nt, rcross) #X, Y = createFakeData2(n, r, nt, hubs) X,Y = np.load('t8data.npy') # import IPython # IPython.embed() init_pt = np.nonzero(Y)[0][0] ker = True pi = sum(Y)/len(Y) print "Constructing the similarity matrix:" A = X.T.dot(X) t1 = time.time() if ker: print "Performing Kernel AS" f1,h1,s1,fs1 = AS.kernel_AS (X, Y, pi=pi, num_eval=num_eval, init_pt=init_pt, verbose=verbose,all_fs=True) t2 = time.time() #print "Performing Eigen decmop" #Xe, b, w, deg = eigenmap(A, d) #t3 = time.time() if ker: print "Performing Naive Shari AS" f2,h2,s2,fs2 = AS.shari_activesearch_probs_naive(A, labels=Y, pi=pi, w0=None, eta=None, num_eval=num_eval, init_pt=init_pt, verbose=verbose, all_fs=True) #f2,h2,s2,fs2 = AS.lreg_AS (Xe, deg, d, alpha=0.0, labels=Y, options={'num_eval':num_eval,'pi':pi,'n_conncomp':b}, verbose=verbose) t4 = time.time() print "Time taken for kernel:", t2-t1 #print "Time taken for eigenmap + computing X.T*X:", t3-t2 print "Time taken for Shari's method (naive):", t4-t2 if ker: print "h_kernel: %i/%i"%(h1[-1],num_eval) print "h_lreg: %i/%i"%(h2[-1],num_eval) import IPython IPython.embed()
def test9(): verbose = True datadir = osp.join(os.getenv('HOME'), 'Research/Data/ActiveSearch/sibi_matrices') tsfile = osp.join(datadir, 'timestamps.csv') tffile = osp.join(datadir, 'tfidf_pretranspose.txt') contactsfile = osp.join(datadir, 'email_person_bitarray.txt') #ts_data = ef.load_timestamps (tsfile) Xfull = load_sparse_csr('Xfull1.npz') n = 5000 r = 2000 nt = 15#int(0.1*n) num_eval = nt*2 # getting rid of features which are zero for all these elements X = np.array((Xfull[:,:n]).todense()) X = X[np.nonzero(X.sum(axis=1))[0],:] X = X[:,np.nonzero(X.sum(axis=0))[1]] # import IPython # IPython.embed() X = X[:r,:] X = X[np.nonzero(X.sum(axis=1))[0],:] X = X[:,np.nonzero(X.sum(axis=0))[1]] # import IPython # IPython.embed() r,n = X.shape d = 20 nt = 10#int(0.1*n) num_eval = 15#nt*2 Y = np.array([1]*nt + [0]*(n-nt), dtype=int) pi = nt*1.0/n init_pt = 100 A = X.T.dot(X) import IPython IPython.embed() t1 = time.time() print "Kernel method" #f1,h1,s1,fs1,dt = AS.kernel_AS (X, Y, pi=pi, num_eval=num_eval, init_pt=init_pt, verbose=verbose,all_fs=True,tinv=True) t2 = time.time() print "Eigen map" #Xe, b, w, deg = eigenmap(A, d) #np.save('eigenstuff',[Xe, b, w, deg]) Xe, b, w, deg = np.load('eigenstuff.npy') # import IPython # IPython.embed() t3 = time.time() print "Shari method" #f2,h2,s2 = AS.lreg_AS (Xe, deg, d, alpha=0.0, labels=Y, options={'num_eval':num_eval,'pi':pi,'n_conncomp':b,'init_pt':init_pt}, verbose=verbose) t4 = time.time() f3,h3,s3,fs3 = AS.shari_activesearch_probs_naive(A, labels=Y, pi=pi, w0=None, eta=None, num_eval=num_eval, init_pt=init_pt, verbose=verbose, all_fs=True) print "Time taken for kernel:", t2-t1 #print "Time taken for inverse:", dt print "Time taken for eigen decomp:", t3 - t2 print "Time taken for lreg:", t4-t3 #f1 = np.squeeze(f1) import IPython IPython.embed()