def pruneTree(self): # iterator_stop: stop = 100 # record attributes about max accuracy self.maxAttr = [] # set bestTree to original tree bestTree = self.treeroot # create validation handler bestvalidat = validation(self.filepath, bestTree) validat = validation(self.filepath, None) # calculate default max accuracy maxaccu = bestvalidat.calculateAccuracy() for i in range(stop): # copy current best tree to currentTree for further operation currentTree = copy.deepcopy(bestTree) validat.setTreeRoot(currentTree) # set up a temporary list to record deleted nodes tmp = [] for n in range(self.nodeNum): # convert currentTree to a list of tree nodes self.treeToList(currentTree) if len(self.treeList) <= 3: break # generate random Numbers j = self.randomNumberGenerator() self.treeList[j].isLeaf = 1 self.treeList[j].left = None self.treeList[j].right = None tmp.append(self.treeList[j]) newaccu = validat.calculateAccuracy() if newaccu > maxaccu: self.maxAccuracy = newaccu maxaccu = newaccu bestTree = currentTree self.maxAttr = [self.tree.attrset[i.attr] for i in tmp] # reset the original training tree to best tree self.tree.tree = bestTree self.treeroot = bestTree
def pruneTree(self): # iterator_stop: stop = 100 # record attributes about max accuracy self.maxAttr = [] # set bestTree to original tree bestTree = self.treeroot # create validation handler bestvalidat = validation(self.filepath,bestTree) validat = validation(self.filepath,None) # calculate default max accuracy maxaccu = bestvalidat.calculateAccuracy() for i in range(stop): # copy current best tree to currentTree for further operation currentTree = copy.deepcopy(bestTree) validat.setTreeRoot(currentTree) # set up a temporary list to record deleted nodes tmp = [] for n in range(self.nodeNum): # convert currentTree to a list of tree nodes self.treeToList(currentTree) if len(self.treeList) <= 3: break # generate random Numbers j = self.randomNumberGenerator() self.treeList[j].isLeaf = 1 self.treeList[j].left = None self.treeList[j].right = None tmp.append(self.treeList[j]) newaccu = validat.calculateAccuracy() if newaccu > maxaccu: self.maxAccuracy = newaccu maxaccu = newaccu bestTree = currentTree self.maxAttr = [self.tree.attrset[i.attr] for i in tmp] # reset the original training tree to best tree self.tree.tree = bestTree self.treeroot = bestTree
def printAccuracy(self,testPath,filehandle): tmp = validation(testPath,self.treeroot) sys.stdout = filehandle print('\n|-----------------------------------------------------------------------------------------------|') print('\n |--number of nodes tried to prune: ',self.nodeNum,'--|--starting node number: ',self.levelcontrol,' --|') print('\n Node selected to prune: ',self.maxAttr) print(' The prediction accuracy on given data set after pruning the tree is ',format(tmp.calculateAccuracy(),'5.2%')) print('|-----------------------------------------------------------------------------------------------|\n') sys.stdout = sys.__stdout__
def test_sign(self): from Validation import validation message_info = { 'email_id': r'"Иван Иванович" <*****@*****.**>', 'body_of_msg': r'Добрый день, вот лабораторная http://github.com', 'head_of_msg': r'ТРПО. Лабораторная работа №3', } valid_dict = validation(message_info['head_of_msg'], message_info['body_of_msg'], '"Иван Иванович"') self.assertEqual(valid_dict, {'Number': '3', 'URL': ['http://github.com'], 'errorDescription': ['Отсутствует подпись']})
def test_all_positive(self): from Validation import validation message_info = { 'email_id': r'"Иван Иванович" <*****@*****.**>', 'body_of_msg': r'Добрый день, вот лабораторная http://github.com -- С уважением, Иван Иванович 18-ИСбо-2а', 'head_of_msg': r'ТРПО. Лабораторная работа №3', } valid_dict = validation(message_info['head_of_msg'], message_info['body_of_msg'], 'Иван Иванович') self.assertEqual(valid_dict, {'Number': '3', 'URL': ['http://github.com'], 'errorDescription': []})
def test_title(self): from Validation import validation message_info = { 'email_id': r'"Иван Иванович" <*****@*****.**>', 'body_of_msg': r'Добрый день, вот лабораторная http://github.com -- С уважением, Иван Иванович 18-ИСбо-2а', 'head_of_msg': r'Лаба', } valid_dict = validation(message_info['head_of_msg'], message_info['body_of_msg'], '"Иван Иванович"') self.assertEqual(valid_dict, {'Number': '', 'URL': ['http://github.com'], 'errorDescription': ['Нет номера лабораторной работы', 'Неверно указана дисциплина']})
def printAccuracy(self, testPath, filehandle): tmp = validation(testPath, self.treeroot) sys.stdout = filehandle print( '\n|-----------------------------------------------------------------------------------------------|' ) print('\n |--number of nodes tried to prune: ', self.nodeNum, '--|--starting node number: ', self.levelcontrol, ' --|') print('\n Node selected to prune: ', self.maxAttr) print( ' The prediction accuracy on given data set after pruning the tree is ', format(tmp.calculateAccuracy(), '5.2%')) print( '|-----------------------------------------------------------------------------------------------|\n' ) sys.stdout = sys.__stdout__
def test_all_negative(self): from Validation import validation message_info = { 'email_id': r'"Иван Иванович" <*****@*****.**>', 'body_of_msg': r'Вот лабораторная http://github.com', 'head_of_msg': r'Лаба', } valid_dict = validation(message_info['head_of_msg'], message_info['body_of_msg'], '"Иван Иванович"') self.assertEqual(valid_dict, {'Number': '', 'URL': ['http://github.com'], 'errorDescription': ['Отсутствует подпись', 'Нет приветствия', 'Нет номера лабораторной работы', 'Неверно указана дисциплина']})
if not email: send_message(service, USER_ID, email_name, email_name_surname, 3, None, None, message_info) logger.warning(r"main: Email don't exist in table_valid") print(r"main: Email don't exist in table_valid") else: # Получение группы пользователя result = search_group(email) group_user = result[0] group_name_surname = result[1] # Выставление его в журнал, если отсутствует result_add_table = add_table(group_user, group_name_surname)[0] if result_add_table == 'available' or result_add_table == 'accepted': # Проверка валидации письма valid_dict = validation(message_info['head_of_msg'], message_info['body_of_msg']) if len(valid_dict["errorDescription"]) > 0: send_message(service, USER_ID, email_name, email_name_surname, 2, valid_dict, valid_dict, message_info) logger.warning( r"main: Message failed validation. Email_id :%s" % email_id) print( f"main: Message failed validation. Email_id :{email_id}" ) else: # Получение результата из модуля проверки answer = 1 # check_lab(valid_dict['URL'], valid_dict['Number'])['grade'] logger.info( r"main: Receiving a response from the verification module. Mark in table :%s"
def main(): # check if the command form is correct if len(sys.argv)<6: print(" Correct Input Form:\n --python3--|--main.py--|--number of nodes to prune--|--training set path--|--validation set path --|--test set path--|--print or not--|") sys.exit("Error: Missing Arguments!\n exited").gettrace() # set home directory as the PATH PATH = './Data/' # read arguments from the command pruneNum = int(sys.argv[1]) trainingPath = PATH+sys.argv[2] validationPath = PATH+sys.argv[3] testPath = PATH+sys.argv[4] printTreeVal = int(sys.argv[5]) # set Information entropy threshold to be 0, under which we regard it as a pure node threshold = 0 # create a directory storing output data files os.makedirs('./Data/output',exist_ok=True) ######################## IG-based Tree ################################ #################################################################################### # build a decision tree and train it decisiontree = decisionTree(trainingPath,threshold) # file store the tree structure before pruning filehandle = open('./Data/output/original_tree.txt','w') # print tree decisiontree.printTree(filehandle) # close file filehandle.close() ##################################################################################### # file storing the accuracy information of the tree before pruning filehandle = open('./Data/output/accuracy.txt','a') filehandle.seek(0) filehandle.truncate() ##################################################################################### # check accuracy with validation data set before pruning treeValid = validation(testPath,decisiontree.getTree()) # print the accuracy on test data set treeValid.printAccuracy(filehandle) # print the average depth and total number of nodes decisiontree.printLevNod(filehandle) # prune the tree if pruneNum != 0: prunetree = prune(decisiontree,validationPath,pruneNum,0) prunetree.pruneTree() prunetree.printAccuracy(testPath,filehandle) # print the average depth and total number of nodes decisiontree.printLevNod(filehandle) filehandle.close() if printTreeVal == 1: ############################################################################## # create a file to store the IG-built tree after post-prune file = open('./Data/output/postprune_tree.txt','w') ############################################################################## prunetree.printTree(file) ############################################################################## file.close() ###################### Random-built Tree ######################### #################################################################################### # create a file storing the tree structure filehandle = open('./Data/output/rand_original_tree.txt','w') # build a decision tree and train it by randomly selecting attributes decisiontree = randDecisionTree(trainingPath,threshold) # print tree decisiontree.printTree(filehandle) # close file filehandle.close() ##################################################################################### # file storing the accuracy information of the tree before pruning filehandle = open('./Data/output/rand_accuracy.txt','a') filehandle.seek(0) filehandle.truncate() ##################################################################################### # check accuracy with validation data set before pruning treeValid = validation(testPath,decisiontree.getTree()) # print the accuracy on test data set treeValid.printAccuracy(filehandle) # print the average depth and total number of nodes decisiontree.printLevNod(filehandle) # prune the tree if pruneNum != 0: prunetree = prune(decisiontree,validationPath,pruneNum,0) prunetree.pruneTree() prunetree.printAccuracy(testPath,filehandle) # print the average depth and total number of nodes decisiontree.printLevNod(filehandle) filehandle.close() if printTreeVal == 1: ############################################################################## # create a file to store the tree after post-prune file = open('./Data/output/rand_postprune_tree.txt','w') ############################################################################## prunetree.printTree(file) ############################################################################## file.close()
def main(): # check if the command form is correct if len(sys.argv) < 6: print( " Correct Input Form:\n --python3--|--main.py--|--number of nodes to prune--|--training set path--|--validation set path --|--test set path--|--print or not--|" ) sys.exit("Error: Missing Arguments!\n exited").gettrace() # set home directory as the PATH PATH = './Data/' # read arguments from the command pruneNum = int(sys.argv[1]) trainingPath = PATH + sys.argv[2] validationPath = PATH + sys.argv[3] testPath = PATH + sys.argv[4] printTreeVal = int(sys.argv[5]) # set Information entropy threshold to be 0, under which we regard it as a pure node threshold = 0 # create a directory storing output data files os.makedirs('./Data/output', exist_ok=True) ######################## IG-based Tree ################################ #################################################################################### # build a decision tree and train it decisiontree = decisionTree(trainingPath, threshold) # file store the tree structure before pruning filehandle = open('./Data/output/original_tree.txt', 'w') # print tree decisiontree.printTree(filehandle) # close file filehandle.close() ##################################################################################### # file storing the accuracy information of the tree before pruning filehandle = open('./Data/output/accuracy.txt', 'a') filehandle.seek(0) filehandle.truncate() ##################################################################################### # check accuracy with validation data set before pruning treeValid = validation(testPath, decisiontree.getTree()) # print the accuracy on test data set treeValid.printAccuracy(filehandle) # print the average depth and total number of nodes decisiontree.printLevNod(filehandle) # prune the tree if pruneNum != 0: prunetree = prune(decisiontree, validationPath, pruneNum, 0) prunetree.pruneTree() prunetree.printAccuracy(testPath, filehandle) # print the average depth and total number of nodes decisiontree.printLevNod(filehandle) filehandle.close() if printTreeVal == 1: ############################################################################## # create a file to store the IG-built tree after post-prune file = open('./Data/output/postprune_tree.txt', 'w') ############################################################################## prunetree.printTree(file) ############################################################################## file.close() ###################### Random-built Tree ######################### #################################################################################### # create a file storing the tree structure filehandle = open('./Data/output/rand_original_tree.txt', 'w') # build a decision tree and train it by randomly selecting attributes decisiontree = randDecisionTree(trainingPath, threshold) # print tree decisiontree.printTree(filehandle) # close file filehandle.close() ##################################################################################### # file storing the accuracy information of the tree before pruning filehandle = open('./Data/output/rand_accuracy.txt', 'a') filehandle.seek(0) filehandle.truncate() ##################################################################################### # check accuracy with validation data set before pruning treeValid = validation(testPath, decisiontree.getTree()) # print the accuracy on test data set treeValid.printAccuracy(filehandle) # print the average depth and total number of nodes decisiontree.printLevNod(filehandle) # prune the tree if pruneNum != 0: prunetree = prune(decisiontree, validationPath, pruneNum, 0) prunetree.pruneTree() prunetree.printAccuracy(testPath, filehandle) # print the average depth and total number of nodes decisiontree.printLevNod(filehandle) filehandle.close() if printTreeVal == 1: ############################################################################## # create a file to store the tree after post-prune file = open('./Data/output/rand_postprune_tree.txt', 'w') ############################################################################## prunetree.printTree(file) ############################################################################## file.close()