def main(): file=open("Bupa.txt",'r') features=[] #数据集特征集 labels=[] #数据集类标集 for line in file: #一行行读数据文件 line=line.strip() tempVec=line.split(',') labels.append(tempVec[len(tempVec)-1]) tempVec2=[tempVec[i] for i in range(0,len(tempVec)-1)] features.append(tempVec2) Bay=BayesClassifier() Bay.train(features,labels) correct=0 for i in range(0,len(features)): label=Bay.classify(features[i]) print("Original:"+str(labels[i])+"==>"+"Classified:"+label) if str(label)==str(labels[i]): correct+=1 print("Accuracy:",correct/len(features)) #正确率
def main(): file=open("./Weather.txt",'r') features=[] #数据集特征集 labels=[] #数据集类标集 for line in file: #一行行读数据文件 line=line.strip() tempVec=line.split(',') labels.append(tempVec[len(tempVec)-1]) tempVec2=[tempVec[i] for i in range(0,len(tempVec)-1)] features.append(tempVec2) Bay=BayesClassifier() Bay.train(features,labels) correct=0 for i in range(0,len(features)): #print features[i] label=Bay.classify(features[i]) #print("Original:"+str(labels[i])+"==>"+"Classified:"+label) if str(label)==str(labels[i]): correct+=1 #print Bay.classify('mild') #print("Accuracy:",correct/len(features)) #正确率 print features print labels
def create(): conn = MySQLdb.connect(user = '******',db='test',passwd = 'LEsc2008',host='localhost') cursor = conn.cursor(cursorclass = MySQLdb . cursors . DictCursor) cursor.execute('SET NAMES utf8') res = cursor.execute('SELECT * FROM le_goods_cate_keywords') e = cursor.fetchall() i1 = [] i2 = [] i3 = [] i4 = [] i5 = [] i6 = [] i7 = [] i8 = [] i9 = [] features=[] #数据集特征集 labels=[] #数据集类标集 for i in e: #print i #print i['cate_id'], i['keyword'] if i['cate_id'] == 1: i1.append(i['keyword']) if i['cate_id'] == 2: i2.append(i['keyword']) if i['cate_id'] == 3: i3.append(i['keyword']) if i['cate_id'] == 4: i4.append(i['keyword']) if i['cate_id'] == 5: i5.append(i['keyword']) if i['cate_id'] == 6: i6.append(i['keyword']) if i['cate_id'] == 7: i7.append(i['keyword']) if i['cate_id'] == 8: i8.append(i['keyword']) if i['cate_id'] == 9: i9.append(i['keyword']) labels.append('1') features.append(i1) labels.append('2') features.append(i2) labels.append('3') features.append(i3) labels.append('4') features.append(i4) labels.append('5') features.append(i5) labels.append('6') features.append(i6) labels.append('7') features.append(i7) labels.append('8') features.append(i8) labels.append('9') features.append(i9) Bay=BayesClassifier() Bay.train(features,labels) # print features # print labels print Bay.classify("JUMBO Seafood NSRCC: $65 for $100 Cash Voucher at Changi Coast Walk. More Options Available ")
def testBayes(): features = [] #数据集特征集 labels = [] #数据集类标集 features_t = [] maxProbability_t = [] tables_result = [] Merchant_ids_t = [] Merchant_ids_test = {} #商家ID字典,test Merchant_ids_train = {} #商家ID字典,train testData = [] trainData = [] #测试数据集读取 test_data = open('./test_data/data_revised.csv') for line in test_data.readlines(): lineArr = line.strip().split(',') Merchant_ids_t.append(int(lineArr[1])) features_t.append([float(lineArr[3]), int(lineArr[4])]) table4 = Table4(lineArr[0], lineArr[2], lineArr[5], '0') tables_result.append(table4) Merchant_ids_test[lineArr[2]] = testData.append( [float(lineArr[3]), int(lineArr[4])]) #训练数据集读取 all_data = open('./train_data/Date_all.csv') for line in all_data.readlines(): lineArr = line.strip().split(',') features.append([float(lineArr[3]), int(lineArr[4])]) labels.append(int(lineArr[7])) Merchant_ids_train[lineArr[2]] = trainData.append( [float(lineArr[3]), int(lineArr[4]), int(lineArr[7])]) # print Merchant_ids_train.keys() # print Merchant_ids_test.keys() features_key = [] #数据集特征集 labels_key = [] #数据集类标集 num_not_in = 0 for i in range(0, len(features_t)): key = Merchant_ids_t[i] key_dir_name = './train_data/merchant_train_data/' + str( key) + '_noNull' + '.csv' features_key = [] labels_key = [] if os.path.exists(key_dir_name) == True: key_data = open(key_dir_name) for line in key_data.readlines(): lineArr = line.strip().split(',') features_key.append([float(lineArr[3]), int(lineArr[4])]) labels_key.append(int(lineArr[7])) print len(features_key) print len(labels_key) if len(features_key) > 1: Bay = BayesClassifier() Bay.train(features_key, labels_key) label, maxProbability = Bay.classify(features_t[i]) print("maxProbability:" + str(maxProbability) + "==>" + "Classified:" + label) tables_result[i].giveProbability(str(maxProbability)) items = [ tables_result[i].User_id, tables_result[i].Coupon_id, tables_result[i].Date_received, tables_result[i].Probability ] dir_name = './result/table4_4' savecsv(dir_name, items) else: num_not_in = num_not_in + 1 print num_not_in