Пример #1
0
def test_accuracy(rootNode):
    print("Testing Accuracy of the Algorithm")
    testCptTable = parse_training_data('data/featnames.csv','data/testfeat.csv','data/testlabs.csv')
    num_corrects = 0
    num_wrongs =   0
    for row in testCptTable:
        #for each row test the accuracy
        if not IsCompliant(row, rootNode):
            num_wrongs+=1
        else:
            num_corrects+=1
    total_count = num_corrects+num_wrongs
    accuracy = (num_corrects/total_count)*100
    print("          Accuracy Percentage ="+str(accuracy))
Пример #2
0
def test_accuracy(rootNode):
    print("Testing Accuracy of the Algorithm")
    testCptTable = parse_training_data('data/featnames.csv',
                                       'data/testfeat.csv',
                                       'data/testlabs.csv')
    num_corrects = 0
    num_wrongs = 0
    for row in testCptTable:
        #for each row test the accuracy
        if not IsCompliant(row, rootNode):
            num_wrongs += 1
        else:
            num_corrects += 1
    total_count = num_corrects + num_wrongs
    accuracy = (num_corrects / total_count) * 100
    print("          Accuracy Percentage =" + str(accuracy))
Пример #3
0
def main():

    print(" Please choose if you want to use chi-square:")
    print(" 1. Do not use Chi-Square")
    print(" 2. Use Chi-Square")
    userInput = input(" Enter your choice:")
    global p_value
    useChiSq = False
    if userInput == 1:
        useChiSq = False
    elif userInput == 2:
        userPvalue = input(" Enter p value, Valid values (0.01|0.05|1):")
        if (userPvalue != 0.01) and (userPvalue != 0.05) and (userPvalue != 1):
            print(" Please enter a valid P value, exiting")
            return
        p_value = str(userPvalue)
        useChiSq = True
    else:
        print(" Please make a valid choice, exiting ")
        return
    global total_size
    total_size = 0
    print(" Given p value " + str(p_value))
    #CPT table is a list of rows with attributes and values, approximately of 40k size
    print(" Parsing Training Data")
    cptTable = parse_training_data('data/featnames.csv', 'data/trainfeat.csv',
                                   'data/trainlabs.csv')
    global total_num_rows
    total_num_rows = len(cptTable)
    attributes = cptTable[0].keys()
    attributes.remove('nextPage')
    print(" Building decision tree")
    root_node = build_id3(cptTable, attributes, useChiSq)
    #bestInfoGainAttr(cptTable)
    #print_id3(root_node)
    test_accuracy(root_node)
    calculatespace(root_node)
    print("Total Size (Nodes in decision tree)=" + str(total_size))
Пример #4
0
def main():

    print(" Please choose if you want to use chi-square:")
    print(" 1. Do not use Chi-Square")
    print(" 2. Use Chi-Square")
    userInput = input(" Enter your choice:")
    global p_value
    useChiSq = False
    if userInput == 1:
        useChiSq = False
    elif userInput == 2:
        userPvalue = input(" Enter p value, Valid values (0.01|0.05|1):")
        if (userPvalue != 0.01) and (userPvalue != 0.05) and (userPvalue != 1):
            print(" Please enter a valid P value, exiting")
            return
        p_value = str(userPvalue)
        useChiSq = True
    else:
        print(" Please make a valid choice, exiting ")
        return
    global total_size
    total_size =0
    print(" Given p value "+str(p_value))
    #CPT table is a list of rows with attributes and values, approximately of 40k size
    print(" Parsing Training Data")
    cptTable = parse_training_data('data/featnames.csv','data/trainfeat.csv','data/trainlabs.csv')
    global total_num_rows
    total_num_rows = len(cptTable)
    attributes = cptTable[0].keys()
    attributes.remove('nextPage')
    print(" Building decision tree")
    root_node  = build_id3(cptTable, attributes, useChiSq)
    #bestInfoGainAttr(cptTable)
    #print_id3(root_node)
    test_accuracy(root_node)
    calculatespace(root_node)
    print("Total Size (Nodes in decision tree)="+str(total_size))