def predict(language_dist,comment): nLanguages=3 n=3 LANGUAGES=list(set(['fr','es','ru'])) print("implementing SVM ranking") X=np.loadtxt('X.txt') Y=np.loadtxt('Y.txt') weights=SVM(X,Y,nLanguages,n) developmentData=loader.loaderShuffleDataTest() computeFeatures(developmentData,language_dist,LANGUAGES,1) Ypredict=weights.predict(comment) if Ypredict==0: return "French" if Ypredict==1: return "Spanish" else: return "Russian" precision=nCorrect/len(Y) print("Precision is equal to "+str(precision)+"on "+str(nbPrediction)+" comments") print(str(ncorrectEnglish)+" english correct on "+str(nEnglish)) print(str(ncorrectFrench)+" english correct on "+str(nFrench)) print(str(ncorrectRussian)+" english correct on "+str(nRussian))
# for i in range(0,n): # j=0 # for language in LANGUAGES: # NumbersWords[i,j]=sum(language_dist[language]['words'][i+1].values()) # j=j+1 # #print NumbersWords # #training features # print ("Starting loaderShuffleData") # trainData=loader.loaderShuffleData() # print ("computing X and Y matrices") # computeFeatures(trainData,language_dist,LANGUAGES,NumbersWords,0) print("implementing SVM ranking") X=np.loadtxt('Xtrain.txt') Y=np.loadtxt('Ytrain.txt') weights=SVM(X,Y,nLanguages,n) developmentData=loader.loaderShuffleDataTest() computeFeatures(developmentData,language_dist,LANGUAGES,1) X=np.loadtxt('Xtest.txt') Y=np.loadtxt('Ytest.txt') nbPrediction=len(Y) Ypredict=weights.predict(X) precision=0 nCorrect=0 for i in range(len(Y)): if Ypredict[i]==Y[i]: nCorrect=nCorrect+1 precision=nCorrect/len(Y) print("Precision is equal to "+str(precision)+"on "+str(nbPrediction)+" comments") # LANGUAGES = list(set(['english', 'french', 'german'])) # for language in LANGUAGES: