示例#1
0
def main():
    file=open("Bupa.txt",'r')
    features=[]            #数据集特征集
    labels=[]                #数据集类标集
    for line in file:         #一行行读数据文件
        line=line.strip()
        tempVec=line.split(',')
        labels.append(tempVec[len(tempVec)-1])
        tempVec2=[tempVec[i] for i in range(0,len(tempVec)-1)]
        features.append(tempVec2)
    Bay=BayesClassifier()
    Bay.train(features,labels)
    correct=0
    for i in range(0,len(features)):
        label=Bay.classify(features[i])
        print("Original:"+str(labels[i])+"==>"+"Classified:"+label)
        if str(label)==str(labels[i]):
            correct+=1
    print("Accuracy:",correct/len(features))    #正确率
示例#2
0
def main():
    file=open("./Weather.txt",'r')
    features=[]            #数据集特征集
    labels=[]                #数据集类标集
    for line in file:         #一行行读数据文件
        line=line.strip()
        tempVec=line.split(',')
        labels.append(tempVec[len(tempVec)-1])
        tempVec2=[tempVec[i] for i in range(0,len(tempVec)-1)]
        features.append(tempVec2)
    Bay=BayesClassifier()
    Bay.train(features,labels)
    correct=0
    for i in range(0,len(features)):
        #print features[i]
        label=Bay.classify(features[i])
        #print("Original:"+str(labels[i])+"==>"+"Classified:"+label)
        if str(label)==str(labels[i]):
            correct+=1
    #print Bay.classify('mild')
    #print("Accuracy:",correct/len(features))    #正确率
    print features
    print labels
示例#3
0
def create():
    conn = MySQLdb.connect(user = '******',db='test',passwd = 'LEsc2008',host='localhost')
    cursor = conn.cursor(cursorclass = MySQLdb . cursors . DictCursor)
    cursor.execute('SET NAMES utf8')
    res = cursor.execute('SELECT * FROM le_goods_cate_keywords')
    e = cursor.fetchall()
    i1 = []
    i2 = []
    i3 = []
    i4 = []
    i5 = []
    i6 = []
    i7 = []
    i8 = []
    i9 = []


    features=[]            #数据集特征集
    labels=[]                #数据集类标集
    for i in e:
        #print i

        #print i['cate_id'], i['keyword']
        if i['cate_id'] == 1:
            i1.append(i['keyword'])
        if i['cate_id'] == 2:
            i2.append(i['keyword'])
        if i['cate_id'] == 3:
            i3.append(i['keyword'])
        if i['cate_id'] == 4:
            i4.append(i['keyword'])
        if i['cate_id'] == 5:
            i5.append(i['keyword'])
        if i['cate_id'] == 6:
            i6.append(i['keyword'])
        if i['cate_id'] == 7:
            i7.append(i['keyword'])
        if i['cate_id'] == 8:
            i8.append(i['keyword'])
        if i['cate_id'] == 9:
            i9.append(i['keyword'])
    labels.append('1')
    features.append(i1)

    labels.append('2')
    features.append(i2)

    labels.append('3')
    features.append(i3)

    labels.append('4')
    features.append(i4)

    labels.append('5')
    features.append(i5)

    labels.append('6')
    features.append(i6)

    labels.append('7')
    features.append(i7)

    labels.append('8')
    features.append(i8)

    labels.append('9')
    features.append(i9)

    Bay=BayesClassifier()
    Bay.train(features,labels)

    # print features
    # print labels
    print Bay.classify("JUMBO Seafood NSRCC: $65 for $100 Cash Voucher at Changi Coast Walk. More Options Available ")
示例#4
0
def testBayes():
    features = []  #数据集特征集
    labels = []  #数据集类标集

    features_t = []
    maxProbability_t = []
    tables_result = []
    Merchant_ids_t = []

    Merchant_ids_test = {}  #商家ID字典,test
    Merchant_ids_train = {}  #商家ID字典,train
    testData = []
    trainData = []

    #测试数据集读取
    test_data = open('./test_data/data_revised.csv')
    for line in test_data.readlines():
        lineArr = line.strip().split(',')
        Merchant_ids_t.append(int(lineArr[1]))
        features_t.append([float(lineArr[3]), int(lineArr[4])])
        table4 = Table4(lineArr[0], lineArr[2], lineArr[5], '0')
        tables_result.append(table4)
        Merchant_ids_test[lineArr[2]] = testData.append(
            [float(lineArr[3]), int(lineArr[4])])
    #训练数据集读取
    all_data = open('./train_data/Date_all.csv')
    for line in all_data.readlines():
        lineArr = line.strip().split(',')
        features.append([float(lineArr[3]), int(lineArr[4])])
        labels.append(int(lineArr[7]))
        Merchant_ids_train[lineArr[2]] = trainData.append(
            [float(lineArr[3]),
             int(lineArr[4]),
             int(lineArr[7])])

    # print Merchant_ids_train.keys()
    # print Merchant_ids_test.keys()
    features_key = []  #数据集特征集
    labels_key = []  #数据集类标集
    num_not_in = 0
    for i in range(0, len(features_t)):
        key = Merchant_ids_t[i]
        key_dir_name = './train_data/merchant_train_data/' + str(
            key) + '_noNull' + '.csv'
        features_key = []
        labels_key = []
        if os.path.exists(key_dir_name) == True:
            key_data = open(key_dir_name)
            for line in key_data.readlines():
                lineArr = line.strip().split(',')
                features_key.append([float(lineArr[3]), int(lineArr[4])])
                labels_key.append(int(lineArr[7]))
            print len(features_key)
            print len(labels_key)

            if len(features_key) > 1:
                Bay = BayesClassifier()
                Bay.train(features_key, labels_key)

                label, maxProbability = Bay.classify(features_t[i])
                print("maxProbability:" + str(maxProbability) + "==>" +
                      "Classified:" + label)
                tables_result[i].giveProbability(str(maxProbability))
                items = [
                    tables_result[i].User_id, tables_result[i].Coupon_id,
                    tables_result[i].Date_received,
                    tables_result[i].Probability
                ]
                dir_name = './result/table4_4'
                savecsv(dir_name, items)
        else:
            num_not_in = num_not_in + 1
    print num_not_in