def main(argv): setpath() try: opts, args = getopt.getopt(argv,"ht:e:",["train=","test="]) if(len(sys.argv) < 5): raise getopt.GetoptError(None) except getopt.GetoptError: print '\nusage: run.py -t <trainfile> -e <testfile> \n' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'run.py -t <trainfile> -e <testfile>' sys.exit() elif opt in ("-t", "--train"): trainfile = arg elif opt in ("-e", "--test"): testfile = arg from file_reader import FileReader fr = FileReader(trainfile) training_Set= fr.getRows() #Readin the test file and creating the matrix from file_reader import FileReader test_File_Reader = FileReader(testfile) testing_Set= test_File_Reader.getRows() test_Result(logistic_Regression(training_Set),testing_Set)
def main(argv): setpath() try: opts, args = getopt.getopt(argv, "ht:e:", ["train=", "test="]) if (len(sys.argv) < 5): raise getopt.GetoptError(None) except getopt.GetoptError: print '\nusage: run.py -t <trainfile> -e <testfile> \n' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'run.py -t <trainfile> -e <testfile>' sys.exit() elif opt in ("-t", "--train"): trainfile = arg elif opt in ("-e", "--test"): testfile = arg from file_reader import FileReader fr = FileReader(trainfile) training_Set = fr.getRows() #Readin the test file and creating the matrix from file_reader import FileReader test_File_Reader = FileReader(testfile) testing_Set = test_File_Reader.getRows() test_Result(logistic_Regression(training_Set), testing_Set)
def main(argv): setpath() try: opts, args = getopt.getopt(argv, "ht:e:", ["train=", "test="]) if (len(sys.argv) < 5): raise getopt.GetoptError(None) except getopt.GetoptError: print('\nusage: run.py -t <trainfile> -e <testfile>\n') sys.exit(2) for opt, arg in opts: if opt == '-h': print('run.py -t <trainfile> -e <testfile>') sys.exit() elif opt in ("-t", "--train"): trainfile = arg elif opt in ("-e", "--test"): testfile = arg from file_reader import FileReader fr = FileReader(testfile) from naive_bayes import NaiveBayes nb = NaiveBayes(trainfile) test_file_reader = FileReader(testfile) testData = test_file_reader.getRows() num_errors = 0 true_positive = 0 false_positive = 0 true_negative = 0 false_negative = 0 #Testing phase for idx, row in enumerate(testData): prediction = nb.binary_classify(row) if row[-1] != prediction: num_errors += 1.0 print("Error on row: %s" % str(idx + 1)) if row[-1] == '1': false_negative += 1 else: false_positive += 1 elif row[-1] == '0': true_negative += 1 else: true_positive += 1 print('\n\n--------------Error Count----------------') print(num_errors) print('\n\n--------------Accuracy----------------') print("\n\nThe Accuracy is " + str((len(testData) - num_errors) * 100 / len(testData)) + "%") print("\n===========The confusion matrix===========") print("\t No \t Yes") print("No \t", str(true_negative) + "\t", str(false_positive)) print("Yes \t", str(false_negative) + "\t", str(true_positive))
def main(argv): setpath() try: opts, args = getopt.getopt(argv,"ht:e:",["train=","test="]) if(len(sys.argv) < 5): raise getopt.GetoptError(None) except getopt.GetoptError: print('\nusage: run.py -t <trainfile> -e <testfile>\n') sys.exit(2) for opt, arg in opts: if opt == '-h': print('run.py -t <trainfile> -e <testfile>') sys.exit() elif opt in ("-t", "--train"): trainfile = arg elif opt in ("-e", "--test"): testfile = arg from file_reader import FileReader fr = FileReader(testfile) from naive_bayes import NaiveBayes nb = NaiveBayes(trainfile) test_file_reader = FileReader(testfile) testData = test_file_reader.getRows() num_errors = 0 true_positive = 0 false_positive = 0 true_negative = 0 false_negative = 0 #Testing phase for idx, row in enumerate(testData): prediction = nb.binary_classify(row) if row[-1] != prediction: num_errors += 1.0 print("Error on row: %s" % str(idx+1)) if row[-1] == '1': false_negative += 1 else: false_positive += 1 elif row[-1] == '0': true_negative += 1 else: true_positive += 1 print('\n\n--------------Error Count----------------') print(num_errors) print('\n\n--------------Accuracy----------------') print("\n\nThe Accuracy is " +str((len(testData) - num_errors)*100/len(testData)) + "%") print("\n===========The confusion matrix===========") print("\t No \t Yes") print("No \t", str(true_negative) + "\t", str(false_positive)) print("Yes \t", str(false_negative) +"\t", str(true_positive))
def calcCounts(self): # Missing labels not yet added. fr = FileReader(self._file) rows = fr.getRows() self._num_features = len(rows[0]) - 1 feature_count = {} label_count = {} label_count['total'] = 0 for row in rows: label_count['total'] += 1 if label_count.get(row[-1]) is None: label_count[row[-1]] = 1 else: label_count[row[-1]] += 1 if feature_count.get(row[-1]) is None: feature_count[row[-1]] = {} for i in range(self._num_features): feature_count[row[-1]][str(i)] = {} feature_count[row[-1]][str(i)]['total'] = 0 for i in range(self._num_features): feature_i = feature_count[row[-1]][str(i)] value = feature_i.get(row[i]) feature_i[ row[i]] = 1 if value is None else feature_i[row[i]] + 1 feature_count[row[-1]][str(i)]['total'] += 1 for label in feature_count: for feature in feature_count[label]: feature_values = set() for l in filter(lambda x: x != 'total', feature_count): for value in feature_count[l][feature]: feature_values.add(value) for value in feature_values: if feature_count[label][feature].get(value) is None: feature_count[label][feature][value] = 0 return (label_count, feature_count)
def calcCounts(self): # Missing labels not yet added. fr = FileReader(self._file) rows = fr.getRows() self._num_features = len(rows[0])-1 feature_count = {} label_count = {} label_count['total'] = 0 for row in rows: label_count['total'] += 1 if label_count.get(row[-1]) is None: label_count[row[-1]] = 1 else: label_count[row[-1]] += 1 if feature_count.get(row[-1]) is None: feature_count[row[-1]] = {} for i in range(self._num_features): feature_count[row[-1]][str(i)] = {} feature_count[row[-1]][str(i)]['total'] = 0 for i in range(self._num_features): feature_i = feature_count[row[-1]][str(i)] value = feature_i.get(row[i]) feature_i[row[i]] = 1 if value is None else feature_i[row[i]]+1 feature_count[row[-1]][str(i)]['total'] += 1 for label in feature_count: for feature in feature_count[label]: feature_values = set() for l in filter(lambda x: x != 'total', feature_count): for value in feature_count[l][feature]: feature_values.add(value) for value in feature_values: if feature_count[label][feature].get(value) is None: feature_count[label][feature][value] = 0 return(label_count, feature_count)
def main(argv): setpath() try: opts, args = getopt.getopt(argv,"ht:e:d:",["train=","test=","maxDepth="]) if(len(sys.argv) < 7): raise getopt.GetoptError(None) except getopt.GetoptError: print '\nusage: run.py -t <trainfile> -e <testfile> -d <maxDepth>\n' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'run.py -t <trainfile> -e <testfile> -d <maxDepth>' sys.exit() elif opt in ("-t", "--train"): trainfile = arg elif opt in ("-e", "--test"): testfile = arg elif opt in ("-d", "--maxDepth"): maxDepth = int(arg) from file_reader import FileReader fr = FileReader(trainfile) from decision_tree_builder import DecisionTreeBuilder # getRows() returns a dataMatrix; dtb = DecisionTreeBuilder(fr.getRows()) print ('Features: {}'.format(fr.featureNames)) root = dtb.build(maxDepth) print('Tree Building Complete and Successful') print('Height of the tree is {}'.format(dtb.decisionTree.height())) #Testing section #create a zero initialized confusion matrix confusion_matrix=[[0 for j in range(len(fr.getClassLabels()))] for j in range(len(fr.getClassLabels()))] #read the test file testFile_Reader = FileReader(testfile) dataMatrix_testFile = testFile_Reader.getRows() Error_Count =0 No =0 Yes=0 No_Error= 0 Yes_error=0 Total_records = len(dataMatrix_testFile)+0.0 #Testing phase for row in dataMatrix_testFile: predicted_classLabel = dtb.predict(row) print ('\tActual Label is {}, and Predicted Label is {}'.format(row[len(row)-1], predicted_classLabel)) #confusion_matrix[int(row[len(row)-1])-1][int(predicted_classLabel)-1]+= 1 if not row[len(row)-1]==predicted_classLabel: Error_Count += 1.0 #To print confusion matrix for zoo data set uncomment line 66 print ('\n\n------------------Confusion Matrix----------') for row in confusion_matrix: print row print('\n\n--------------Error Count----------------') print Error_Count print('\n\n--------------Accuracy----------------') print (Total_records-Error_Count)/Total_records