prob = prob + log(0.00004, 2) # perplex = float(-1)*(float(1)/float(v))*prob # perplex = 10**perplex # print perplex return prob myset = Set([1, 2, 3, 4, 5]) temp = myset fp = open("results_unigram", "w+") Pos_Dict = dict() Neg_Dict = dict() for x in myset: temp.remove(x) Pos_Dict = get_bigram("pos", "dataset", temp) # Calculate pos_perplexity print len(Pos_Dict) Neg_Dict = get_bigram("neg", "dataset", temp) print len(Neg_Dict) # Calculate neg_perplexity fpath = "dataset/" + str(x) test_file_p = os.listdir(fpath + "/pos") test_file_n = os.listdir(fpath + "/neg") fp.writelines("Test folder:" + str(x) + "\n") # test positive folder under test folder y_true = list() y_pred = list() Npos = 0 Nneg = 0
fh.close() ''' myset = Set([1,2,3,4,5]); temp = myset #fp = open('results_unigram','w+') master_dict= dict() Pos_Dict = dict() Neg_Dict= dict() path = 'dataset' for x in myset: #temp.remove(x) Pos_Dict = get_bigram('pos', path, temp) Neg_Dict = get_bigram('neg', path, temp) print "1" master_dict.update(Pos_Dict) master_dict.update(Neg_Dict) print "2" master = () master = sorted(master_dict) print "3" #print master_dict master = () master = sorted(master_dict) print "4" #print master
perplex = float(-1)*(float(1)/float(v))*prob perplex = 10000*(10**perplex) #print perplex return perplex myset = Set([1,2,3,4,5]); temp = myset fp = open('results_bigram','w+') Pos_Dict_uni= dict() Pos_Dict_bi= dict() Neg_Dict_uni= dict() Neg_Dict_bi= dict() for x in myset: temp.remove(x) Pos_Dict_uni = get_unigram('pos','/home/avj/Documents/NLP/NLP_BinaryClassifier/dataset',temp) Pos_Dict_bi = get_bigram('pos','/home/avj/Documents/NLP/NLP_BinaryClassifier/dataset',temp) # Calculate pos_perplexity print len(Pos_Dict_bi) print len(Pos_Dict_uni) Neg_Dict_uni = get_unigram('neg','/home/avj/Documents/NLP/NLP_BinaryClassifier/dataset',temp) Neg_Dict_bi = get_bigram('neg','/home/avj/Documents/NLP/NLP_BinaryClassifier/dataset',temp) print len(Neg_Dict_bi) print len(Neg_Dict_uni) # Calculate neg_perplexity fpath = '/home/avj/Documents/NLP/NLP_BinaryClassifier/dataset/'+str(x) test_file_p = os.listdir(fpath +'/pos') test_file_n = os.listdir(fpath+'/neg') fp.writelines("Test folder:"+str(x)+"\n") #test positive folder under test folder Pos_Dict_bi = convert(Pos_Dict_uni, Pos_Dict_bi)