Exemplo n.º 1
0
#print(trainDataArray)
#print(trainResultsArray)
#print(trainDataArray.sum(axis=0))
#print(testDataArray)
#print(testResultsArray)

# start training tree
numLabelY = np.sum(trainResultsArray) # num of Yes in Label
numLabelN = numTrainData - numLabelY

trainTree = Tree(trainDataArray, trainResultsArray, classL, testDataArray, 
	testResultsArray)
# first output
#print('[%d+/%d-]' % (numLabelY,numLabelN))
trainTree.setLabelDist([numLabelY, numLabelN])

numAttrYArray = trainDataArray.sum(axis=0) # num of Yes (1's) per attribute
pAttrYArray = numAttrYArray/numTrainData # probability of Yes per attribute
pLabelY = numLabelY/numTrainData # probability of Yes in Label 

# calculate entropy for each attribute
HAttrArray = np.zeros([classL.numAttr])
for i in range(classL.numAttr):
	HAttrArray[i] = H(pAttrYArray[i])

HLabel = H(pLabelY) # calculate entropy for label

# for root node, MI(Y;X) = H(Y) - H(Y|X), X = label, Y = attributes
# calculate max MI