示例#1
0
def naiveBayesMC():
    binSize = 10;
    maxPrice = 100;
    dataSplit = 0.70;

    print "Loading items... "
    allItems                = getItems(db,filteritems=True,complete=True,sold=True,genre=ROCK);   
    [trainItems,testItems]  = splitItemSet(allItems,dataSplit);

    bins = generateBinArray(binSize,maxPrice);
    [actualFinalPrice,actualFinalPriceBinned] = getFinalPrices(testItems,bins);

    ##Only have to make these onece
    orderedWordList = generateOrderedWordList(allItems,lengthcutoff=5,frequencycutoff=1);  
    [testMatrix,testCategory]   = generateMatrixData(orderedWordList,generateItemTitleList(testItems,orderedWordList));

    predictedFinalPrices = [-1]*len(testItems);
    for priceCutOff in bins:
        print "Price cut off: ", priceCutOff

        [phi_k_unsold,phi_k_sold,p_y0,p_y1] = trainOnData(trainItems,orderedWordList,priceCutOff);
        [testingSetPredictions,prob_sell,prob_wontSell]  = makePredictions(testMatrix,phi_k_sold,phi_k_unsold,p_y0,p_y1);    #predicted category for testItems [0,1]
        predictedFinalPrices = updatePredictedFinalPrice(testItems,predictedFinalPrices,testingSetPredictions,priceCutOff,binSize,bins);

    for i in range(len(predictedFinalPrices)):
        if predictedFinalPrices[i] == -1: predictedFinalPrices[i] = bins[-1];
            
    for i in range(len(predictedFinalPrices)):
        print i,testItems[i][0],testItems[i][START_PRICE],testItems[i][END_PRICE],getBinOf(bins,float(testItems[i][START_PRICE])), actualFinalPriceBinned[i],predictedFinalPrices[i],"\t\t",testItems[i][TITLE]
            
    print "Classification error on testing set is: ", classificationError(predictedFinalPrices,actualFinalPriceBinned);
def runClassificationErrorTest():
    fail = False;
    p2 = [0,1,0];
    a2 = [0,1,1];

    if (m.classificationError([0],[0]) != 0):       fail = True;
    if (m.classificationError([0],[1]) != 1):       fail = True;
    if (m.classificationError([0],[-1]) != 1):      fail = True;

    if (m.classificationError([0,0],[0,0]) != 0):   fail = True;
    if (m.classificationError([0,0],[0,1]) != 0.5): fail = True;
    if (m.classificationError([0,0],[1,1]) != 1):   fail = True;

    if (fail):
        print "Classification Error Function has failed!";
示例#3
0
    print "Items in testing set: ",len(testItems);

    orderedWordList = generateOrderedWordList(allItems,frequencycutoff = 4);

    print "Training on data set ..."
    [phi_k_unsold,phi_k_sold,p_y0,p_y1] = trainOnData(trainItems,orderedWordList,0)

    printTopWords(phi_k_sold,phi_k_unsold,orderedWordList)

    print "Preparing test data ... "
    itemTitleList = generateItemTitleList(testItems,orderedWordList);
    [testMatrix,testCategory] = generateMatrixData(orderedWordList,itemTitleList);

    print "Making predictions ... "
    [testingSetPredictions,prob_sell,prob_wontSell] = makePredictions(testMatrix,phi_k_sold,phi_k_unsold,p_y0,p_y1,uniformPrior=False);
    print "Classification error on testing set is: ", classificationError(testingSetPredictions,testCategory);

def printTopWords(phi_k_sold,phi_k_unsold,orderedWordList):
    rating = [];
    for i in range(len(phi_k_sold)):
        rating.append(np.log(phi_k_sold[i] / phi_k_unsold[i]) ); 

    ratingsSorted =  [orderedWordList[i[0]] for i in sorted(enumerate(rating), key=lambda x:x[1])]
    reverseratingsSorted  =   [orderedWordList[i[0]] for i in sorted(enumerate(rating), key=lambda x:x[1])]
    reverseratingsSorted.reverse();

    print
    print "Highest rated words: ", ratingsSorted[0:30]
    print "Lowest rated: ",reverseratingsSorted[0:30]

##