def decision_tree_learning(data, attributes, parent_examples, i): if len(data) == 0: return plurality_value(parent_examples) check, classification = same_classification_check(data) if check: return classification if len(list(attributes)) == 0: return plurality_value(data) a = importance(data, attributes)[0][0] # a = the random variable with the highest gain out of the data node = Node(a) # each node will have a tuple with attribute of parent node and node with the next random variable node.set_depth(i) # sets the depth of each node (used to print the tree) i += 1 tree = Tree(node) index = index_of_var(a, data) attributes1 = get_attributes(index, data) # attributes1 = random variables if attributes1 is None: # if there are no random variables left to look at, return the plurality value return plurality_value(data) for attribute in attributes1: exs = get_examples_attribute(index, attribute, data) temp = list(attributes) try: temp.remove(a) # used to look at the data without a given random variable except Exception: pass variables = tuple(temp) subtree = decision_tree_learning(exs, variables, data, i) tree.get_root().add_child(subtree.get_root(), attribute) # adds branch to the tree return tree
def test_search_node(self): depth = 2 tournament_tree = Tree(self.g1, depth) tournament_tree.get_root().add_child(self.g2) tournament_tree.get_root().add_child(self.g3) self.assertEqual(tournament_tree.search_node(self.g2).get_value(), self.g2) self.assertEqual(tournament_tree.search_node(self.g3).get_value(), self.g3) self.assertEqual(tournament_tree.search_node(self.g1).get_value(), self.g1)
def main(): cadeia_binaria = "" ## Leitura do arquivo binário #arq = 'compact.bin' try: # print command line arguments for arg in sys.argv[1:]: with open(arg, 'rb') as f: byte = (f.read(1)) while len(byte) > 0: # != b'': #print(byte) #print('{:0>8}'.format(bin(int.from_bytes(byte, byteorder=sys.byteorder))[2:]), end="") cadeia_binaria += ('{:0>8}'.format( bin(int.from_bytes(byte, byteorder=sys.byteorder))[2:])) byte = f.read(1) ## Decodificação do cabeçalho e obtenção do index de onde começa o texto root_dec, pos = decode(cadeia_binaria) ## Decodificação do texto com base no cabeçalho tree_dec = Tree(root_dec) char_atual = "" texto_completo = "" root_dec = copy.copy(tree_dec.get_root()) pos = [pos - 1] while (char_atual != "EOF"): root_dec = copy.copy(tree_dec.get_root()) char_atual = decode_char(root_dec, pos, cadeia_binaria) texto_completo += char_atual ## Retirada do EOF(End Of File) do texto texto_completo = texto_completo[:-3] ## salvamento do texto completo em um arquivo salvar_arquivo_descompactado(texto_completo) print("Arquivo descompactado com sucesso(descompact.txt)!!!!") except: print("Não existe arquivo chamado:", arq)
def decode(string_cod): if (string_cod != ""): stack = [] node = Node(None, None, 'raiz', 0) #new_node = (None, None, 'raiz', 'raiz') stack.append(node) tree = Tree(node) index = 1 while (len(stack) != 0 and index < len(string_cod)): if (string_cod[index] == '0'): node = Node(None, None, 'n-folha', 0) elif (string_cod[index] == '1'): aux = string_cod[index + 1:index + 8] if (aux == '1111111'): #print(aux, 'EOF') node = Node(None, None, 'folha', 'EOF') index += 7 else: convert = int(aux, 2) #print(aux, convert, end=' ') convert = convert.to_bytes((convert.bit_length() + 7) // 8, 'big').decode() #print(convert) node = Node(None, None, 'folha', convert) index += 7 if (stack[-1].get_e() == None): stack[-1].set_e(node) else: stack[-1].set_d(node) stack.pop() if (type(node.get_char()) == int): stack.append(node) index += 1 return (tree.get_root(), index)
from node import Node from queue import Queue from tree import Tree tree = Tree("apple") tree.get_root().set_left_child(Node("banana")) tree.get_root().set_right_child(Node("cherry")) tree.get_root().get_left_child().set_left_child(Node("dates")) # tree.get_root().get_left_child().set_right_child(Node("elderberry")) # tree.get_root().get_right_child().set_left_child(Node("fig")) # tree.get_root().get_right_child().set_right_child(Node("grape")) def bfs(tree): q = Queue() root = tree.get_root() q.enq(root) visit_order = [] while q: node = q.deq() visit_order.append(node.value) if node.has_left_child(): q.enq(node.get_left_child()) if node.has_right_child(): q.enq(node.get_right_child()) print(visit_order)
def build_aggregate_tree(options,args,fields,dim,types,dict_dim, otfa = lambda x,y: x.get_root(),randomize=False,no_final_aggregation=False,from_gui=True): tree_src = None tree_dst = None list_res = [] list_window = [] tree = None f = open(options.input) lines = [ l for l in f] f.close() total_count = 0.0 current_window = 0 lineno = 0 for line in lines: print lineno lineno+=1 find = re.search(options.reg_exp,line) if find: dict_fields = {} for i in range(len(fields)): dict_fields[fields[i]] = find.group(i+1) try: sec = time.mktime(time.strptime(dict_fields["timestamp"],"%Y-%m-%d %H:%M:%S")) except: sec = int(dict_fields["timestamp"]) if sec >= current_window + options.window: if tree != None: list_res.append((tree,total_count)) tree = Tree(dim) current_window = current_window + options.window while sec>= current_window + options.window: list_res.append((None,0.0)) list_window.append(current_window) current_window = current_window + options.window else: tree = Tree(dim) current_window = sec total_count = 0.0 list_window.append(current_window) update_tree(tree,dict_fields,dict_dim,options.type_aggr) total_count+= float(dict_fields[VALUE]) #SKIP HERE if 1 == 0: try: update_tree(tree,dict_fields,dict_dim,options.type_aggr) #print tree,dict_fields,dict_dim,options.type_aggr except Exception, e: print e raise tree.set_root(tree.get_root().post_aggregate()) update_tree(tree,dict_fields,dict_dim,options.type_aggr) total_nodes = len(tree.get_root().preorder()) if total_nodes > options.max_nodes: tree.set_root(otfa(tree,total_count)) tree.set_root(tree.get_root().post_aggregate()) #aggregate_LRU(tree,options.max_nodes,options.aggregate,total_count) #print tree.get_root().preorder() #tree.increase_age_tree() total_count+= float(dict_fields[VALUE]) map(lambda x: x.increase_age(),tree.get_root().preorder())
def benchmark_aggregation(options,args,fields,dim,types,dict_dim, otfa = lambda x: x.get_root()): tree_src = None tree_dst = None list_res = [] list_window = [] tree = None f = open(options.input) total_count = 0.0 current_window = 0 tree = Tree(dim) k = 0 for line in f: find = re.search(options.reg_exp,line) if find: dict_fields = {} for i in range(len(fields)): dict_fields[fields[i]] = find.group(i+1) #print dict_fields try: sec = time.mktime(time.strptime(dict_fields["timestamp"],"%Y-%m-%d %H:%M:%S")) except: sec = int(dict_fields["timestamp"]) list_window.append(current_window) try: update_tree(tree,dict_fields,dict_dim,options.type_aggr) except: tree.set_root(tree.get_root().post_aggregate()) update_tree(tree,dict_fields,dict_dim,options.type_aggr) total_nodes = len(tree.get_root().preorder()) if total_nodes > options.max_nodes: tree.set_root(otfa(tree,total_count)) tree.set_root(tree.get_root().post_aggregate()) #aggregate_LRU(tree,options.max_nodes,options.aggregate,total_count) #print tree.get_root().preorder() #tree.increase_age_tree() total_count+= float(dict_fields[VALUE]) map(lambda x: x.increase_age(),tree.get_root().preorder()) k=k+1 if options.max_lines > 0 and options.max_lines < k : break if tree.get_root() != None: list_res.append((tree,total_count)) pretotal_nodes_before_aggregation = len(tree.get_root().preorder()) tree.aggregate(options.aggregate,total_count) print "Total nodes before pre order aggregation %s"%pretotal_nodes_before_aggregation print "Total nodes after aggregation %s"%len(tree.get_root().preorder())
def build_stability_aggregation_trees(options,args,fields,dim,types,dict_dim, otfa = lambda x,y: x.get_root(),randomize=False,no_final_aggregation=False): #TODO: remove nof_final aggregtion parameter and replace it by the test below if not options.aggregate>0: no_final_aggregation = True tree_src = None tree_dst = None list_res = [] list_window = [] tree = None f = open(options.input) lines = [ l for l in f] f.close() if randomize: random.shuffle(lines) total_count = 0.0 current_window = 0 for line in lines: #print line find = re.search(options.reg_exp,line) #print find if find: dict_fields = {} for i in range(len(fields)): dict_fields[fields[i]] = find.group(i+1) #print dict_fields try: # print dict_fields["timestamp"] sec = time.mktime(time.strptime(dict_fields["timestamp"],"%Y-%m-%d %H:%M:%S")) except: sec = int(dict_fields["timestamp"]) if sec >= current_window + options.window: if tree != None: list_res.append((tree,total_count)) tree = Tree(dim) current_window = current_window + options.window while sec>= current_window + options.window: list_res.append((None,0.0)) list_window.append(current_window) current_window = current_window + options.window else: tree = Tree(dim) current_window = sec total_count = 0.0 list_window.append(current_window) try: update_tree(tree,dict_fields,dict_dim,options.type_aggr) except Exception, e: print e raise tree.set_root(tree.get_root().post_aggregate()) update_tree(tree,dict_fields,dict_dim,options.type_aggr) total_nodes = len(tree.get_root().preorder()) if total_nodes > options.max_nodes: tree.set_root(otfa(tree,total_count)) tree.set_root(tree.get_root().post_aggregate()) #aggregate_LRU(tree,options.max_nodes,options.aggregate,total_count) #print tree.get_root().preorder() #tree.increase_age_tree() total_count+= float(dict_fields[VALUE]) map(lambda x: x.increase_age(),tree.get_root().preorder())
def main(): conteudo = "" # print command line arguments try: for arg in sys.argv[1:]: print(arg) with open(arg, 'r') as file: conteudo += file.read() tabela_inicial = {} for i in list(conteudo): tabela_inicial[i] = list(conteudo).count(i) tabela_inicial["EOF"] = 1 #print(tabela_inicial) nodes = [] #Criar lista de árvores com um nó for chave in tabela_inicial: nodes.append(Node(None, None, tabela_inicial[chave], chave)) #Algoritmo de Huffman while len(nodes) > 1: menores = menores_nodes(nodes) node_pai = Node(menores[0], menores[1], menores[0].get_apar() + menores[1].get_apar(), menores[0].get_apar() + menores[1].get_apar()) nodes.remove(menores[0]) nodes.remove(menores[1]) nodes.append(node_pai) #criação da árvore tree = Tree(nodes[0]) root = tree.get_root() #r = tree.get_root() #pre_ordem(r, "") #Criação da tabela de codificação tabela_final = {} tabela_pre(root, [], tabela_final) print("Tabela de codificação: ", tabela_final) #Codificação do texto a partir da tabela cod = "" for i in conteudo: cod += tabela_final[i] #Adicionar sinalização de final do arquivo cod += tabela_final['EOF'] #Criando cabeçalho do arquivo cab_e_texto = [""] cab(root, cab_e_texto, tabela_final) cab_e_texto = cab_e_texto[0] #print(cab_e_texto) #Unificando cabeçalho e o conteudo do texto já em binário arquivo = cab_e_texto + cod arquivo[:] salvar_arquivo(arquivo) print("Arquivo compactado criado com sucesso (compact.txt)!!!!!") except: print("Não existe arquivo chamado")
def test_init_node_has_id_of_root(self): n = Node(title='foo', id='root') t = Tree(n) self.assertEqual(t.get_root().get_id(), 'root')
def test_init_node_not_have_id_root(self): """ test init using a node who's id is not 'root'""" n = Node(title='foo', id=0) t = Tree(n) self.assertEqual(t.get_root().get_id(), 'root')
def test_get_root(self): depth = 2 tournament_tree = Tree(self.g1, depth) self.assertEqual(tournament_tree.get_root().get_value(), self.g1)
# Get the trees now for i in range(num_trees): # Get train data since train data varies for each tree num_examples_to_sample = np.round( 0.1 * train_data.shape[0]).astype(int) ind = np.random.randint(0, train_data.shape[0], num_examples_to_sample) label_data_tr = np.hstack((train_label[ind].reshape( (-1, 1)), train_data[ind])).astype('str') label_data_tr = np.vstack((header, label_data_tr)) train_data_obj = Data(data=label_data_tr) myTree = Tree() tree_util.ID3(train_data_obj, train_data_obj.attributes, myTree.get_root(), myTree, True, depth) # Prediction from each tree on all of train data and test data out_tr = tree_util.prediction_label( label_data_tr_full_obj, myTree) out_te = tree_util.prediction_label( label_data_te_obj, myTree) data_tr_svm[:, i] = out_tr data_te_svm[:, i] = out_te # Train on the best hyperparameter classifier = SVM(num_trees, C) # SVM part start training for e in range(epoch_cv):
data2 = np.loadtxt(DATA_DIR + 'test.csv', delimiter=',', dtype=str) data_obj2 = Data(data=data2) # Train data data = np.loadtxt(DATA_DIR + 'train.csv', delimiter=',', dtype=str) data_obj = Data(data=data) print("Filling Missing Entries\n...") # Fill the missing entries in the data majority = util.get_majority_column_data(data_obj) data_obj = util.fill_data(data_obj, majority, data) data_obj2 = util.fill_data(data_obj2, majority, data2) print("...\n...\n...\nFilled Missing Entries\n") myTree = Tree() util.ID3(data_obj, data_obj.attributes, myTree.get_root(), myTree, False, 2) # print("--------------- Printing Tree --------------------") myTree.print_tree(myTree.get_root(), 0) # Accuracy Prediction acc = util.prediction_accuracy(data_obj, myTree) acc = util.prediction_accuracy(data_obj2, myTree) # Depth Prediction print("\nDepth of Tree = " + str(myTree.get_depth(myTree.get_root()))) ################################################################################ # Q3 (2a) ################################################################################ DATA_DIR = 'data/CVfolds_new/'
class TestTreeMethods(unittest.TestCase): def setUp(self): self.n1 = Node(title='node1', id='1', parent_id='root') self.n2 = Node(title='node2', id='2', parent_id='1') self.n3 = Node(title='node3', id='3', parent_id='1') self.n4 = Node(title='node4', id='4', parent_id='2') self.n5 = Node(title='node5', id='5', parent_id='4') # set up tree with multiple nodes self.t1 = Tree() self.t1.add(self.n1) # node1 has many children self.t1.add(self.n2) self.t1.add(self.n3) self.t1.add(self.n4) self.t1.add(self.n5) #print("Tree before the test:") #print(self.t1) # set up tree with only one node besides root self.n6 = Node('node6', '6', parent_id='root') self.one_node_tree = Tree() self.one_node_tree.add(self.n6) def tearDown(self): self.n1 = None self.n2 = None self.n3 = None self.n4 = None self.n5 = None self.n6 = None self.t1 = None self.t2 = None def test_get_root(self): self.assertEqual(self.t1.get_root().get_id(), 'root') def test_init_node_not_have_id_root(self): """ test init using a node who's id is not 'root'""" n = Node(title='foo', id=0) t = Tree(n) self.assertEqual(t.get_root().get_id(), 'root') def test_init_node_has_id_of_root(self): n = Node(title='foo', id='root') t = Tree(n) self.assertEqual(t.get_root().get_id(), 'root') def test_string_empty_tree(self): t2 = Tree(None) self.assertEqual(t2.__str__(), '|---Google_Drive\n') def test_string_non_empty_tree(self): print("You can't really test this...automatically") print(self.t1) def test_search_for_root(self): result = self.t1.search('root') self.assertTrue(result.get_id() == 'root') def test_search_for_first_node_added(self): result = self.t1.search('1') self.assertTrue(result.get_id() == '1') def test_search_for_nonexisting_node_in_one_node_tree(self): result = self.one_node_tree.search(self.n2.get_id()) self.assertTrue(result == None) def test_new_tree_add_2_nodes_and_print_it(self): t = Tree() n = Node(title='test', id='1', parent_id='root') t.add(n) n = Node(title='test2', id='2', parent_id='1') t.add(n) print(t) def test_new_tree_add_2_nodes_and_search_it(self): t = Tree() n = Node(title='test', id='1', parent_id='root') t.add(n) n = Node(title='test2', id='2', parent_id='1') t.add(n) #print(t) result = t.search('2') self.assertEqual(result.get_id(), '2') # From here down, tests are failing def test_search_for_nested_leaf_node(self): result = self.t1.search(self.n5.get_id()) self.assertTrue('5' == result.get_id()) def test_search_for_node1(self): result = self.t1.search(self.n1.get_id()) self.assertTrue(result.get_id(), '1') def test_search_for_node2(self): result = self.t1.search(self.n2.get_id()) self.assertTrue(result.get_id(), '2') def test_search_for_node3(self): result = self.t1.search(self.n3.get_id()) self.assertTrue(result.get_id(), '3') def test_search_for_node4(self): result = self.t1.search(self.n4.get_id()) self.assertTrue(result.get_id(), '4') def test_search_for_node5(self): result = self.t1.search(self.n5.get_id()) self.assertTrue(result.get_id(), '5') def test_search_empty_tree(self): root = None empty_tree = Tree(root) result = empty_tree.search(self.n1.get_id()) self.assertEqual(result, None) def test_check_that_node_was_added(self): n = Node('test_node', id='7', parent_id='4') was_added = self.t1.add(n) #print(self.t1) self.assertEqual(was_added, 1) def test_add_node_whose_parent_is_in_tree(self): """ test adding node whose parent is node4 """ n = Node('test_node2', id='8', parent_id='4') was_added = self.t1.add(n) # should be 1 #print(self.t1) self.assertEqual(was_added, 1) def test_add_node_whose_parent_is_not_in_tree(self): n = Node('test_node3', id='9', parent_id='0') was_added = self.t1.add(n) # should be -1 self.assertEqual(was_added, -1) def test_add_node_whose_parent_is_none(self): n = Node('test_node', id='8') was_added = self.t1.add(n) # should be 0 self.assertEqual(was_added, 0)
from tree import Tree from tree import Node myTree = Tree() #print(myTree.get_root()) n = Node('taste',5) p = Node('var',6) q = Node('var',7) r = Node('var',8) s = Node('name',9) myTree.add_node(n,myTree.get_root()) print("Traversing the tree after adding 1 node") myTree.print_tree(myTree.get_root(),0) myTree.add_node(p,myTree.search_node(myTree.get_root(),n.feature,n.value)) print("Traversing the tree after adding 2 nodes") myTree.print_tree(myTree.get_root(),0) myTree.add_node(q,myTree.search_node(myTree.get_root(),n.feature,n.value)) myTree.add_node(r,myTree.search_node(myTree.get_root(),q.feature,q.value)) print("Traversing the tree after adding 4 nodes") myTree.print_tree(myTree.get_root(),0) myTree.add_node(s,myTree.search_node(myTree.get_root(),r.feature,r.value)) """ n.add_child(p) n.add_child(q) n.add_child(r) r.add_child(s)
n = Node('taste') n.add_value('o') p = Node('var') n.add_value('a') q = Node('var') n.add_value('b') r = Node('var') r.add_value('c') s = Node('name') myTree.add_node(n, myTree.get_root()) print("Traversing the tree after adding 1 node") myTree.print_tree(myTree.get_root(), 0) myTree.add_node(p, n) #myTree.add_node(p,myTree.search_node(myTree.get_root(),n.feature,n.value)) print("Traversing the tree after adding 2 nodes") myTree.print_tree(myTree.get_root(), 0) myTree.add_node(q, n) myTree.add_node(r, n) print("Traversing the tree after adding 4 nodes") myTree.print_tree(myTree.get_root(), 0) myTree.add_node(s, r) """ n.add_child(p)