prob = prob + 0.0 perplex = float(-1)*(float(1)/float(v))*prob perplex = 10**perplex print perplex return perplex myset = Set([1,2,3,4,5]); temp = myset fp = open('results_unigram','w+') Pos_Dict= dict() Neg_Dict= dict() for x in myset: temp.remove(x) Pos_Dict = get_unigram('pos','/home/avj/Documents/NLP/NLP_BinaryClassifier/dataset',temp) # Calculate pos_perplexity print len(Pos_Dict) Neg_Dict = get_unigram('neg','/home/avj/Documents/NLP/NLP_BinaryClassifier/dataset',temp) print len(Neg_Dict) # Calculate neg_perplexity fpath = '/home/avj/Documents/NLP/NLP_BinaryClassifier/dataset/'+str(x) test_file_p = os.listdir(fpath +'/pos') test_file_n = os.listdir(fpath+'/neg') fp.writelines("Test folder:"+str(x)+"\n") #test positive folder under test folder for test in test_file_p: pos_perp = perplexity(Pos_Dict, fpath+"/pos/"+test) neg_perp = perplexity(Neg_Dict, fpath+"/pos/"+test) if pos_perp < neg_perp:
for l in label: files = os.listdir(path+'/'+str(fold)+'/'+l) for f in files: build(master_dict, path+'/'+str(fold)+'/'+l+'/'+f, l, fh) fh.close() myset = Set([1,2,3,4,5]); temp = myset #fp = open('results_unigram','w+') master_dict= dict() Pos_Dict = dict() Neg_Dict= dict() path = 'dataset' for x in myset: temp.remove(x) Pos_Dict = get_unigram('pos', path, temp) Neg_Dict = get_unigram('neg', path, temp) master_dict.update(Pos_Dict) master_dict.update(Neg_Dict) for i in master_dict.iterkeys(): master_dict[i] = 0 #print "length"len(master_dict) for i in Pos_Dict.iterkeys(): master_dict[i] += Pos_Dict[i] for i in Neg_Dict.iterkeys(): master_dict[i] += Neg_Dict[i] #print sorted_x[0] #a = str(sorted_x[0]).split(",")[0][2:]
prob = prob +log( 0.00004, 2) #perplex = float(-1)*(float(1)/float(v))*prob #perplex = 10**perplex #print perplex return prob myset = Set([1,2,3,4,5]); temp = myset fp = open('results_unigram','w+') Pos_Dict= dict() Neg_Dict= dict() for x in myset: temp.remove(x) Pos_Dict = get_unigram('pos','dataset',temp) # Calculate pos_perplexity print len(Pos_Dict) Neg_Dict = get_unigram('neg','dataset',temp) print len(Neg_Dict) # Calculate neg_perplexity fpath = '/home/avj/Documents/NLP/NLP_BinaryClassifier/dataset/'+str(x) test_file_p = os.listdir(fpath +'/pos') test_file_n = os.listdir(fpath+'/neg') fp.writelines("Test folder:"+str(x)+"\n") #test positive folder under test folder y_true = list() y_pred = list() Npos =0 Nneg = 0