def handle_predict(argv): hypothesis = None model = None with open(argv[3], "r") as f: # DONT DO THIS ITS INSECURE. IM INSANE model = f.readline().strip('\n') hypothesis = f.readline() f.close() hypothesis = literal_eval(hypothesis) tree = None tree = DecisionTree() tree.define_positive_class(lambda x: x.classification == 'en') tree.define_classes(processing.classes) tree.define_attributes(processing.attr_definitions) examples = process_file(argv[4], training=False) examples = tree.create_examples(examples) return tree.classify(examples, hypothesis)
def handle_predict(argv): hypothesis = None model = None with open(argv[2], "r") as f: model = f.readline().strip('\n') hypothesis = f.readline() f.close() hypothesis = literal_eval(hypothesis) tree = None if model == "dt": tree = DecisionTree() else: tree = Adaboost() tree.define_positive_class(lambda x: x.classification == 'en') tree.define_classes(processing.classes) tree.define_attributes(processing.attr_definitions) examples = process_file(argv[3], training=False) examples = tree.create_examples(examples) for classification in tree.classify(examples, hypothesis): print(classification)
reader = csv.reader(csvfile, delimiter=',') trainingRecords = [] testingRecords = [] cnt = 0 for row in reader: record = Record(row) cnt = cnt + 1 if cnt < 300: trainingRecords.append(record) else: testingRecords.append(record) forest = RandomForest(trainingRecords, 200, 3, 3) tree = DecisionTree(trainingRecords) print("Printing Random Forest") print(str(forest)) print("--------------------------------------------") print("Feature for a single classification : "+str(trainingRecords[0].feature)) print("True result for a single classification : " + str(trainingRecords[0].party)) print("Est. result from random forest : " + str(forest.classify(trainingRecords[0].feature))) print("Est. result from decision tree : " + str(tree.classify(trainingRecords[0].feature))) print("--------------------------------------------") eval = Evaluator() print("Est. average accuracy from random forest : " + str(eval.evaluation(forest,testingRecords))) print("Est. average accuracy from decision tree : " + str(eval.evaluation(tree,testingRecords)))
def main(): parser = get_parser() args = parser.parse_args() input_file_name = args.input_file_name number_of_trials = args.number_of_trials training_set_size = args.training_set_size verbose = args.verbose decision_tree_performance = [] prior_classifier_performance = [] for i in range(number_of_trials): labels, dataset = parse_file(input_file_name) training_set, testing_set = partition(dataset, training_set_size) testing_set_size = len(testing_set) t = DecisionTree(labels, training_set, testing_set) p = PriorClassifier(labels, training_set, testing_set) # Evaluating Testing set performance c = t.test() tree_prob = int( round(100 * float(c['TP'] + c['TN']) / len(t.testing_set))) decision_tree_performance.append(tree_prob) k = p.test() prior_prob = int( round(100. * float(k['TP'] + k['TN']) / len(p.testing_set))) prior_classifier_performance.append(prior_prob) # PRINTING ALL THE THINGS #### print(""" TRIAL NUMBER: %d -------------------- DECISION TREE STRUCTURE: """ % i) print(t.tree) print(""" Percent of test cases correctly classified by a decision tree built \twith ID3 = %d%% Percent of test cases correctly classified by using prior \tprobabilities from the training set = %d%% """ % (tree_prob, prior_prob)) if verbose: print("Examples in the training set\n--------------------" "") print("\t".join(label for label in t.labels) + '\n') for vector in t.training_set: example_string = str(vector.attributes[t.labels[0]]) for i in range(len(t.labels) - 1): tabs = '\t' * ((len(t.labels[i]) // 8) + 1) label = t.labels[i + 1] example_string += tabs + str(vector.attributes[label]) print(example_string) print("Examples in the testing set\n--------------------") labels = t.labels + ["CLASS", "PRIOR RESULT", "ID3 RESULT"] print("\t".join(label for label in labels) + '\n') for vector in t.testing_set: example_string = str(vector.attributes[t.labels[0]]) for j in range(len(t.labels) - 1): tabs = '\t' * ((len(t.labels[j]) // 8) + 1) label = t.labels[j + 1] example_string += tabs + str(vector.attributes[label]) last_tab = '\t' * ((len(t.labels[-1]) // 8) + 1) example_string += last_tab + str(vector.CLASS) + '\t' example_string += str(False) + '\t\t' example_string += str(t.classify(vector.attributes)) print(example_string) treeMean = sum(decision_tree_performance) / len( decision_tree_performance) priorMean = sum(prior_classifier_performance) / len( prior_classifier_performance) print(""" Example file used = %s Number of trials = %d Training set size for each trial = %d Testing set size for each trial = %d Mean performance of decision tree over all trials = %d%% Mean performance of using prior probability derived from training set = %d%% correct classification """ % (input_file_name, number_of_trials, training_set_size, testing_set_size, treeMean, priorMean))
# 深拷贝 newLabel = copy.deepcopy(labels) print(labels) tree = DecisionTree.createTree(myData, labels) # print(myData) # print(DecisionTree.calcShannonEntropy(myData)) # print(DecisionTree.splitDataSet(myData,0,1)) # print(DecisionTree.chooseBestFeatureToSplit(myData)) # print(tree) print(myData) print(newLabel) classLabel = DecisionTree.classify(tree, newLabel, [1, 1]) print(classLabel) DecisionTree.storeTree(tree, 'classifierStorage.txt') treeTxt = DecisionTree.grabTree('classifierStorage.txt') print(treeTxt) fr = open('../resource/lenses/lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLables = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = DecisionTree.createTree(lenses, lensesLables)
def main(): parser = ArgumentParser(description='Porcessador de dados utilizando ID3.') parser.add_argument( '-e', '--examples', type=str, help='Documento com os dados que queremeos que a máquina aprenda.') parser.add_argument('-p', '--print', action='store_true', help='Imprimir a árvore.') parser.add_argument( '-t', '--testes', type=str, help='Documentos onde se encontram os dados que se prentende avaliar.') args = parser.parse_args() if len(argv) == 1: parser.print_help() exit(0) '''Leitura do ficheiro CSV dos exemplos''' with open(args.examples, 'rt') as fd: exemplosBuf = csv.reader(fd) firstRow = exemplosBuf.__next__() exemplos = [] # type: list(list(str)) for i in exemplosBuf: exemplos.append(i) atributos = {} # type: dict(str,int) for i in range(len(firstRow)): atributos[firstRow[i]] = i classe = firstRow[-1] fd.close() arvore = DecisionTree(exemplos, atributos, classe) if args.print: print(arvore) if args.testes is not None: '''Leitura do csv dos testes''' with open(args.testes, 'rt') as fd: exemplosBuf = csv.reader(fd) firstRow = exemplosBuf.__next__() for aux in exemplosBuf: if not aux: break dicio = {} # type: dict(str,str) for i in range(len(firstRow)): dicio[firstRow[i]] = aux[i] '''Procurar resposta''' resul = arvore.classify(dicio) if resul is None: print(arvore.mostCommon()) else: print(resul)