w_estimado,yy3,xx3=lasso.lasso(Xtotal,Ytotal,vec,Xtotal,Ytotal,Xtotal,Ytotal,max_iter,lamb) w0.append(w_estimado[0,0]) w1.append(w_estimado[1,0]) w2.append(w_estimado[2,0]) w3.append(w_estimado[3,0]) w4.append(w_estimado[4,0]) print w_estimado import pylab import matplotlib.pyplot as plt plt.figure(1) ''' plt.title("Comparacao entre os 3 metodos") plt.plot(xx1,yy1,"b",xx2,yy2,"g",xx3,yy3,"r",xx1,[custo for cenas in xrange(len(xx1))],"k--") pylab.ylim([custo-1e14,custo+4e14]) plt.show() ''' plt.plot(xg,w0,"r",xg,w1,"k",xg,w2,"b",xg,w3,"g",xg,w4,"y") plt.show() ''' #pdb.set_trace() print "X=>",X.data print "Y=>",Y.data print "W=>",W.data vec=sparse.csr_matrix([1000000 for i in xrange(X.shape[1])]) vec=vec.transpose() grad.grad(X,Y,vec) '''
trainsize=int(Xtotal.shape[0]*0.8) devsize=int(Xtotal.shape[0]*0.1)+trainsize #Xtrain,Ytrain,Xtest,Ytest,Xdev,Ydev=separaXY(Xtotal,Ytotal) #train_index=xrange(trainsize) #dev_index=xrange(trainsize,devsize) #test_index=xrange(devsize,Xtotal.shape[0]) Xtotal=Xtotal.tocsc() Xtotal,indices=cria_dados.delstopword(Xtotal,indices,False) Xtotal,indices=tira_meta(Xtotal,indices) #Xtotal,indices=repara(Xtotal,indices) Xtotal=Xtotal.tocsc() Ytotal=Ytotal.tocsc() #vec=sparse.csr_matrix([0 for i in xrange(Xtrain.shape[1])]) vec=sparse.csr_matrix([0 for i in xrange(Xtotal[:trainsize,:].shape[1])]) vec=vec.transpose() W,lol1,lol2=Rgrad.grad(Xtotal[:trainsize,:],Ytotal[0:trainsize,0],vec,Xtotal[devsize:,:],Ytotal[devsize:,0],Xtotal[trainsize:devsize,:],Ytotal[trainsize:devsize,0],False,False) #W=Rgrad.grad(Xtrain,Ytrain.transpose(),vec,Xdev,Ydev.transpose(),Xtest,Ytest.transpose(),False,False) #pdb.set_trace() #print "ERRO:",Rfechado.erro(Xtest,Ytest,W) print "ERRO:", Rfechado.erro(Xtotal[trainsize:devsize,:],Ytotal[trainsize:devsize,:],W) print "__________________" print "PIORES 10" pdb.set_trace() for coiso in sorted(W.toarray())[:10]: i=0 while W[i,0] != coiso: i+=1 else: for cenas in indices: if indices[cenas]==i: print cenas , coiso, "->",i