-
Notifications
You must be signed in to change notification settings - Fork 0
/
predictions.py
95 lines (85 loc) · 3.43 KB
/
predictions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#THIS FILE CONTAINS FUNCTIONS WHICH ARE USED TO GENERATE PREDICTIONS GIVEN TREES
import classify as c
import numpy as np
#generates an array of predictions as specified in the CBC manual
#output has shape (numberofexamples, 1), same as the input
def testTrees(treeSet, testSet, attributes, tieBreaker):
binaryClassification = []
sampleSize = []
distribution = []
for i in range(len(testSet)):
row = []
for j in range(len(treeSet)):
row.append(c.classify(treeSet[j], testSet[i], attributes)[0][0])
binaryClassification.append(row)
binaryClassification = np.vstack(binaryClassification)
for i in range(len(testSet)):
row = []
for j in range(len(treeSet)):
row.append(c.classify(treeSet[j], testSet[i], attributes)[1][0])
sampleSize.append(row)
sampleSize = np.vstack(sampleSize)
for i in range(len(testSet)):
row = []
for j in range(len(treeSet)):
row.append(c.classify(treeSet[j], testSet[i], attributes)[2][0])
distribution.append(row)
distribution = np.vstack(distribution)
output = finalPredictions(binaryClassification, sampleSize, distribution, tieBreaker)
return output
#finalPredictions generates final predictions by combining binary predictions from 6 trees
#output is an array of a number from 1-6, each representing the predicted emotion
def finalPredictions(binaryPredictions, sampleSize, sampleDistribution, tieBreaker):
output = []
for i in range(len(binaryPredictions)):
positives = []
for j in range(len(binaryPredictions[i])):
if binaryPredictions[i][j] == 1:
positives.append(j)
if len(positives) == 1:
output.append(positives[0] + 1)
elif len(positives) > 1:
if tieBreaker == 1: #method 1 of tie breaking
output.append(tieBreakerBySize(positives, sampleSize[i]) + 1)
if tieBreaker == 2: #method 2 of tie breaking
output.append(tieBreakerByDistribution(positives, sampleSize[i]) + 1)
else:
output.append(findMin(sampleSize[i]) + 1)
output = np.vstack(output)
return output
#returns the index containing the largest number in an array
def findMax(array):
max = array[0]
maxIndex = 0
for i in range(len(array)):
if array[i] > max:
max = array[i]
maxIndex = i
return maxIndex
#returns the index containing the smallest number in an array
def findMin(array):
min = array[0]
minIndex = 0
for i in range(len(array)):
if array[i] < min:
min = array[i]
minIndex = i
return minIndex
#counts frequency of examples at the leaf and returns the classification with the
#highest frequency of examples
def tieBreakerBySize(positives, sampleSize):
temp = []
for i in range(len(positives)):
temp.append(sampleSize[positives[i]])
return findMax(sampleSize)
#counts distribution of examples at the leaf and returns the classification
#supported by the strongest majority
def tieBreakerByDistribution(positives, sampleDistribution):
temp = []
for i in range(len(positives)):
temp.append(sampleDistribution[positives[i]])
return findMax(sampleDistribution)
#when all trees classify an example as negative, weakestNegative chooses
#the tree with the weakest (smallest) sample size at the leaf
def weakestNegative(sampleSize):
return findMin(sampleSize)