Exemplo n.º 1
0
def naiveBayesMC():
    binSize = 10;
    maxPrice = 100;
    dataSplit = 0.70;

    print "Loading items... "
    allItems                = getItems(db,filteritems=True,complete=True,sold=True,genre=ROCK);   
    [trainItems,testItems]  = splitItemSet(allItems,dataSplit);

    bins = generateBinArray(binSize,maxPrice);
    [actualFinalPrice,actualFinalPriceBinned] = getFinalPrices(testItems,bins);

    ##Only have to make these onece
    orderedWordList = generateOrderedWordList(allItems,lengthcutoff=5,frequencycutoff=1);  
    [testMatrix,testCategory]   = generateMatrixData(orderedWordList,generateItemTitleList(testItems,orderedWordList));

    predictedFinalPrices = [-1]*len(testItems);
    for priceCutOff in bins:
        print "Price cut off: ", priceCutOff

        [phi_k_unsold,phi_k_sold,p_y0,p_y1] = trainOnData(trainItems,orderedWordList,priceCutOff);
        [testingSetPredictions,prob_sell,prob_wontSell]  = makePredictions(testMatrix,phi_k_sold,phi_k_unsold,p_y0,p_y1);    #predicted category for testItems [0,1]
        predictedFinalPrices = updatePredictedFinalPrice(testItems,predictedFinalPrices,testingSetPredictions,priceCutOff,binSize,bins);

    for i in range(len(predictedFinalPrices)):
        if predictedFinalPrices[i] == -1: predictedFinalPrices[i] = bins[-1];
            
    for i in range(len(predictedFinalPrices)):
        print i,testItems[i][0],testItems[i][START_PRICE],testItems[i][END_PRICE],getBinOf(bins,float(testItems[i][START_PRICE])), actualFinalPriceBinned[i],predictedFinalPrices[i],"\t\t",testItems[i][TITLE]
            
    print "Classification error on testing set is: ", classificationError(predictedFinalPrices,actualFinalPriceBinned);
Exemplo n.º 2
0
def updatePredictedFinalPrice(testItems,predictedFinalPrice,testingSetPredictions,priceCutOff,binSize,bins):
    for i in range(len(testItems)):
        if testingSetPredictions[i]==0 and predictedFinalPrice[i]==-1:
            if priceCutOff-binSize>float(testItems[i][START_PRICE]):
                predictedFinalPrice[i] = priceCutOff-binSize;
            elif priceCutOff-binSize<=float(testItems[i][START_PRICE]):
                predictedFinalPrice[i] = getBinOf(bins,float(testItems[i][START_PRICE]));
                if predictedFinalPrice[i]<0: predictedFinalPrice[i]=0;
    return predictedFinalPrice