def main(): trainset, trainRaw = makeData(training, labels) testset, testRaw = makeData(testing, labels) trainLabels = [item[-1] for item in trainRaw] testLabels = [item[-1] for item in testRaw] #print(gf.gainE(trainset,labels[5], labels[-1])) # mytree = id3(trainset, labels, label_attr, labels[-1], 6, "entropy", None) # printTree(mytree) print("Running the decision tree algorithm on the 'Cars' dataset.") algotype = ['gini', 'entropy', 'ME'] for item in algotype: for i in range(1, 7): currentTree = id3.id3(trainset, labels, label_attr, labels[-1], i, item, None) trainPred = [id3.predict(currentTree, x, labels) for x in trainset] testPred = [id3.predict(currentTree, x, labels) for x in testset] trainAcc = id3.accuracy(trainPred, trainLabels) testAcc = id3.accuracy(testPred, testLabels) print("Decision tree of depth", i, "using", item, "has a test accuracy of", testAcc, 'and a training accuracy of', trainAcc)
def boost(data, labels, attr_list, target, iterations, answers): treelist = [] for i in range(iterations): normalize = 0 for item in data: normalize += item['weight'] currentTree = id3(data, labels, attr_list, target, 2, 'entropy', None) # predictor = [] # for item in data: # predictor.append([predict(currentTree, item, labels), item['weight']]) trainError = 0 # print('weight =' , data[0]['weight']) for item in data: if predict(currentTree, item, labels) != item[labels[-1]]: trainError += item['weight'] / normalize # print(trainError) # print(normalize) # alpha = 1/2*m.log((1-trainError)/trainError) alpha = 1 / 4 * m.log((1 - trainError) / trainError) # print(m.exp(alpha), m.exp(-alpha)) # print(alpha, " = alpha") for item in data: if predict(currentTree, item, labels) != item[labels[-1]]: item['weight'] = item['weight'] * m.exp(alpha) # print(item['weight']) else: item['weight'] = item['weight'] * m.exp(-alpha) # print(item['weight']) treelist.append({'tree': currentTree, 'alpha': alpha}) #print(treelist) return treelist
def main(): trainset, trainraw = makeData(training, labels) testset, testraw = makeData(testing, labels) medians = medianAssign(trainset, labels) trainset = removeNums(trainset, medians) testset = removeNums(testset, medians) for element in trainset: element['weight'] = 1 / len(trainset) trainLabels = [item[-1] for item in trainraw] testLabels = [item[-1] for item in testraw] #print(trainset[1]) train = [] test = [] for i in [1, 2, 4, 8, 16, 20, 21]: for element in trainset: element['weight'] = 1 / len(trainset) #currentTree = id3(trainset, labels, label_attr, labels[-1], i, 'entropy', None) treeList = boost(trainset, labels, label_attr, labels[-1], i, trainLabels) trainPred = [] testPred = [] for entry in trainset: trainPred.append( boostGuess(treeList, entry, labels, label_attr['outcome'])) for entry in testset: testPred.append( boostGuess(treeList, entry, labels, label_attr['outcome'])) trainAcc = accuracy(trainPred, trainLabels) if trainAcc < 0.3: trainAcc = 1 - trainAcc testAcc = accuracy(testPred, testLabels) if testAcc < 0.3: testAcc = 1 - testAcc train.append(trainAcc) test.append(testAcc) print("Boosted decision tree with", i, 'iterations, has a training accuracy of', trainAcc) print("Boosted decision tree with", i, 'iterations, has a testing accuracy of', testAcc) showtree = boost(trainset, labels, label_attr, labels[-1], 100, trainLabels) stumpTrainAcc = [] stumpTestAcc = [] for tree in showtree: stumpTrainPred = [] stumpTestPred = [] for entry in trainset: stumpTrainPred.append(predict(tree['tree'], entry, labels)) for entry in testset: stumpTestPred.append(predict(tree['tree'], entry, labels)) stumpTrainAcc.append(accuracy(stumpTrainPred, trainLabels)) # print(stumpTrainAcc[-1]) stumpTestAcc.append(accuracy(stumpTestPred, testLabels)) print('Training stumps', stumpTrainAcc) print('Testing stumps', stumpTestAcc)
def main(): trainset, trainraw = makeData(training, labels) testset, testraw = makeData(testing, labels) medians = medianAssign(trainset, labels) trainset = removeNums(trainset, medians) testset = removeNums(testset, medians) fix = fixUnknown(trainset, labels) trainsetU = replaceUnknown(trainset, labels, fix) testsetU = replaceUnknown(testset, labels, fix) trainLabels = [item[-1] for item in trainraw] testLabels = [item[-1] for item in testraw] #print(trainset[1]) print( "Running decision tree algorithm on the bank dataset with unknown values" ) algotype = ['gini', 'entropy', 'ME'] for item in algotype: for i in range(1, 17): currentTree = id3(trainset, labels, label_attr, labels[-1], i, item, None) trainPred = [predict(currentTree, x, labels) for x in trainset] testPred = [predict(currentTree, x, labels) for x in testset] trainAcc = accuracy(trainPred, trainLabels) testAcc = accuracy(testPred, testLabels) print("Decision tree of depth", i, "using", item, "has a test accuracy of", testAcc, 'and a training accuracy of', trainAcc) print( "Running decision tree algorithm on the bank dataset with unknown's replaced" ) print("\n \n \n \n \n") for item in algotype: for i in range(1, 17): currentTree = id3(trainsetU, labels, label_attr, labels[-1], i, item, None) trainPred = [predict(currentTree, x, labels) for x in trainsetU] testPred = [predict(currentTree, x, labels) for x in testsetU] trainAcc = accuracy(trainPred, trainLabels) testAcc = accuracy(testPred, testLabels) print("Decision tree of depth", i, "using", item, "has a test accuracy of", testAcc, 'and a training accuracy of', trainAcc)
def boostGuess(trees, data, labels, outcomes): guess = 0 for tree in trees: prediction = predict(tree['tree'], data, labels) if prediction == outcomes[0]: guess += tree['alpha'] else: guess -= tree['alpha'] if guess > 0: return outcomes[0] else: return outcomes[1]
def bag_guess(trees, data, labels, outcomes): guessVotes = 0 for tree in trees: guess = predict(tree, data, labels) if guess == outcomes[0]: guessVotes += 1 else: guessVotes -= 1 if guessVotes >= 0: return outcomes[0] else: return outcomes[1]
def result(): form_data=request.form form_data=form_data.to_dict() for keys in form_data: form_data[keys] = int(form_data[keys]) print(form_data) final_result = str(id3.predict(form_data,id3.tree)) if (final_result == "1.0"): final_result = "Below 50%" elif (final_result == "2.0"): final_result =="50-60%" elif (final_result == "3.0"): final_result ="60-70%" elif (final_result == "4.0"): final_result ="70-80%" elif (final_result == "5.0"): final_result ="80-90%" elif (final_result == "6.0"): final_result = "Above 90%" print("The marks is " + final_result) return render_template('result.html',final_result=final_result)
weatherGraph.write('./images/weather.png', prog=None, format='png') print('Done.') # Read car evaluation dataset print('===========================================') print('Reading car evaluation training dataset...') carAttributes, carEvaluationTrainDataSet = utils.readDataSet( './datasets/car-evaluation-train.csv') targetAttribute = carAttributes[-1] carAttributes.remove(targetAttribute) # Train print('>>> Training car evaluation dataset...') carTree = id3.id3(carAttributes, targetAttribute, carEvaluationTrainDataSet) print(carTree) # Test print('Reading car evaluation test dataset') _, testDataset = utils.readDataSet('./datasets/car-evaluation-test.csv') counter = 0 testSize = len(testDataset) print('>>> Testing...') for d in testDataset: targetValue = id3.predict(d, carTree) if targetValue == d[targetAttribute]: counter += 1 ratio = counter / testSize * 100 print('Prediction accuracy: {:.2f}%'.format(ratio))
def main_slow(): trainset, trainraw = makeData(training, labels) testset, testraw = makeData(testing, labels) medians = medianAssign(trainset, labels) trainset = removeNums(trainset, medians) testset = removeNums(testset, medians) for element in trainset: element['weight'] = 1 trainLabels = [item[-1] for item in trainraw] testLabels = [item[-1] for item in testraw] treenums = [1, 100, 200, 300, 500, 800, 1000] for k_val in [2, 4, 6]: print("When you set your attribute subset values to", k_val, "you get the following.") trainAcc = [] testAcc = [] for num in treenums: treelist = [] for i in range(num): newTraining = rand.choices(trainset, k=len(trainset)) newTree = rand_id3(newTraining, labels, label_attr, labels[-1], 18, 'entropy', None, k_val) treelist.append(newTree) trainPred = [] testPred = [] for entry in trainset: thing = bag_guess(treelist, entry, labels, label_attr['outcome']) trainPred.append(thing) for entry in testset: thing = bag_guess(treelist, entry, labels, label_attr["outcome"]) testPred.append(thing) trainAcc.append(accuracy(trainPred, trainLabels)) testAcc.append(accuracy(testPred, testLabels)) print(trainAcc, 'train accuracy') print(testAcc, 'test accuracy') tree_preds = [] basics = [] for i in range(100): train_i = rand.choices(trainset, k=1000) treelist_i = [] for j in range(1000): train_j = rand.choices(train_i, k=1000) newTree = rand_id3(train_j, labels, label_attr, labels[-1], 18, 'entropy', None, 6) treelist_i.append(newTree) # if j%100 == 0: # print("100 more trees from set", i, 'have been trained. iteration = ',j) tree_preds.append(treelist_i) basics.append(treelist_i[0]) print("Tree set", i, "has been trained") singleVar = [] singleBias = [] singleMean = [] for entry in testset: guess_agg = 0 predictions = [] for tree in basics: guess = predict(tree, entry, labels) if guess == label_attr['outcome'][0]: guess_agg += 1 predictions.append(1) else: predictions.append(0) ave = guess_agg / len(basics) singleMean.append(ave) value = 0 if entry['outcome'] == label_attr['outcome'][0]: value = 1 bias = (value - ave)**2 singleBias.append(bias) subVar = [] for h in predictions: mini = (h - ave)**2 subVar.append(mini) var = (1 / (len(basics) - 1)) * sum(subVar) singleVar.append(var) bagVar = [] bagBias = [] bagMean = [] for entry in testset: guess_agg = 0 predictions = [] for trees in tree_preds: guess = bag_guess(trees, entry, labels, label_attr['outcome']) if guess == label_attr['outcome'][0]: guess_agg += 1 predictions.append(1) else: predictions.append(0) ave = guess_agg / len(basics) bagMean.append(ave) value = 0 if entry['outcome'] == label_attr['outcome'][0]: value = 1 bias = (value - ave)**2 bagBias.append(bias) subVar = [] for h in predictions: mini = (h - ave)**2 subVar.append(mini) var = (1 / (len(basics) - 1)) * sum(subVar) bagVar.append(var) sVariance = mean(singleVar) sBias = mean(singleBias) sMSE = sBias + sVariance print("The bias and the variance of the single trees are: Variance:", sVariance, 'Bias:', sBias, "and the general squared error is:", sMSE) bVariance = mean(bagVar) bBias = mean(bagBias) bMSE = bBias + bVariance print("The bias and the variance of the bagged trees are: Variance:", bVariance, 'Bias:', bBias, "and the general squared error is:", bMSE)