def set_up_dynamic_pooling_layer(): tree = file_parser('test\pruebas.py') ls_nodes, dict_ast_to_Node = node_object_creator(tree) ls_nodes = node_position_assign(ls_nodes) ls_nodes, dict_sibling = node_sibling_assign(ls_nodes) embed = Embedding(20, ls_nodes, dict_ast_to_Node) ls_nodes = embed.node_embedding()[:] vector_representation = First_neural_network(ls_nodes, dict_ast_to_Node, 20, 0.1, 0.001) ls_nodes, w_l, w_r, b_code = vector_representation.vector_representation() w_comb1 = torch.diag(torch.randn(20, dtype=torch.float32)).requires_grad_() w_comb2 = torch.diag(torch.randn(20, dtype=torch.float32)).requires_grad_() coding_layer = Coding_layer(20, w_comb1, w_comb2) ls_nodes = coding_layer.coding_layer(ls_nodes, dict_ast_to_Node, w_l, w_r, b_code) w_t = torch.randn(4, 20, requires_grad=True) w_r = torch.randn(4, 20, requires_grad=True) w_l = torch.randn(4, 20, requires_grad=True) b_conv = torch.randn(4, requires_grad=True) convolutional_layer = Convolutional_layer(20, w_t, w_r, w_l, b_conv, features_size=4) ls_nodes = convolutional_layer.convolutional_layer(ls_nodes, dict_ast_to_Node) max_pooling_layer = Max_pooling_layer() max_pooling_layer.max_pooling(ls_nodes) dynamic_pooling = Dynamic_pooling_layer() hidden_input = dynamic_pooling.three_way_pooling(ls_nodes, dict_sibling) return ls_nodes, hidden_input
def set_up_one_max_pooling_layer(): path = os.path.join('test', 'generators') data = os.path.join(path, 'prueba.py') tree = file_parser(data) ls_nodes, dict_ast_to_Node = node_object_creator(tree) ls_nodes = node_position_assign(ls_nodes) ls_nodes, dict_sibling = node_sibling_assign(ls_nodes) ls_nodes = leaves_nodes_assign(ls_nodes, dict_ast_to_Node) ls_nodes = leaves_nodes_assign(ls_nodes, dict_ast_to_Node) embed = Embedding(20, ls_nodes, dict_ast_to_Node) ls_nodes = embed.node_embedding()[:] vector_representation = First_neural_network(ls_nodes, dict_ast_to_Node, 20, 0.1, 0.001, 0, 5) ls_nodes, w_l, w_r, b_code = vector_representation.vector_representation() w_comb1 = torch.diag(torch.randn(20, dtype=torch.float32)).requires_grad_() w_comb2 = torch.diag(torch.randn(20, dtype=torch.float32)).requires_grad_() coding_layer = Coding_layer(20, w_comb1, w_comb2) ls_nodes = coding_layer.coding_layer(ls_nodes, dict_ast_to_Node, w_l, w_r, b_code) w_t = torch.randn(4, 20, requires_grad=True) w_r = torch.randn(4, 20, requires_grad=True) w_l = torch.randn(4, 20, requires_grad=True) b_conv = torch.randn(4, requires_grad=True) convolutional_layer = Convolutional_layer(20, w_t, w_r, w_l, b_conv, features_size=4) ls_nodes = convolutional_layer.convolutional_layer(ls_nodes, dict_ast_to_Node) pooling_layer = Pooling_layer() pooled_tensor = pooling_layer.pooling_layer(ls_nodes) return pooled_tensor
def __init__(self, n=20, m=4, pooling='one-way pooling'): self.vector_size = n self.feature_size = m # parameters self.w_comb1 = torch.diag( torch.randn(self.vector_size, dtype=torch.float32)).requires_grad_() self.w_comb2 = torch.diag( torch.randn(self.vector_size, dtype=torch.float32)).requires_grad_() self.w_t = torch.randn(self.feature_size, self.vector_size, requires_grad=True) self.w_r = torch.randn(self.feature_size, self.vector_size, requires_grad=True) self.w_l = torch.randn(self.feature_size, self.vector_size, requires_grad=True) self.b_conv = torch.randn(self.feature_size, requires_grad=True) # pooling method self.pooling = pooling if self.pooling == 'three-way pooling': self.w_hidden = torch.randn(3, requires_grad=True) self.b_hidden = torch.randn(1, requires_grad=True) self.dynamic = Dynamic_pooling_layer() self.max_pool = Max_pooling_layer() else: self.w_hidden = torch.randn(self.feature_size, requires_grad=True) self.b_hidden = torch.randn(1, requires_grad=True) self.pooling = Pooling_layer() # layers self.cod = Coding_layer(self.vector_size, self.w_comb1, self.w_comb2) self.conv = Convolutional_layer(self.vector_size, self.w_t, self.w_r, self.w_l, self.b_conv, features_size=self.feature_size) self.hidden = Hidden_layer(self.w_hidden, self.b_hidden)
def set_up_hidden_layer(): path = os.path.join('test', 'generators') data = os.path.join(path, 'prueba.py') tree = file_parser(data) ls_nodes, dict_ast_to_Node = node_object_creator(tree) ls_nodes = node_position_assign(ls_nodes) ls_nodes, dict_sibling = node_sibling_assign(ls_nodes) ls_nodes = leaves_nodes_assign(ls_nodes, dict_ast_to_Node) embed = Embedding(20, ls_nodes, dict_ast_to_Node) ls_nodes = embed.node_embedding()[:] vector_representation = First_neural_network(ls_nodes, dict_ast_to_Node, 20, 0.1, 0.001, 0, 5) ls_nodes, w_l, w_r, b_code = vector_representation.vector_representation() w_comb1 = torch.diag(torch.randn(20, dtype=torch.float32)).requires_grad_() w_comb2 = torch.diag(torch.randn(20, dtype=torch.float32)).requires_grad_() coding_layer = Coding_layer(20, w_comb1, w_comb2) ls_nodes = coding_layer.coding_layer(ls_nodes, dict_ast_to_Node, w_l, w_r, b_code) w_t = torch.randn(4, 20, requires_grad=True) w_r = torch.randn(4, 20, requires_grad=True) w_l = torch.randn(4, 20, requires_grad=True) b_conv = torch.randn(4, requires_grad=True) convolutional_layer = Convolutional_layer(20, w_t, w_r, w_l, b_conv, features_size=4) ls_nodes = convolutional_layer.convolutional_layer(ls_nodes, dict_ast_to_Node) max_pooling_layer = Max_pooling_layer() max_pooling_layer.max_pooling(ls_nodes) dynamic_pooling = Dynamic_pooling_layer() hidden_input = dynamic_pooling.three_way_pooling(ls_nodes, dict_sibling) w_hidden = torch.randn(3, requires_grad=True) b_hidden = torch.randn(1, requires_grad=True) hidden = Hidden_layer(w_hidden, b_hidden) output_hidden = hidden.hidden_layer(hidden_input) return output_hidden, w_hidden, b_hidden
def __init__(self, n=30, m=100, pooling='one-way pooling'): self.vector_size = n self.feature_size = m # parameters w_comb1 = numpy.genfromtxt("params\\w_comb1.csv", delimiter=",") self.w_comb1 = torch.tensor(w_comb1, dtype=torch.float32) w_comb2 = numpy.genfromtxt("params\\w_comb2.csv", delimiter=",") self.w_comb2 = torch.tensor(w_comb2, dtype=torch.float32) w_t = numpy.genfromtxt("params\\w_t.csv", delimiter=",") self.w_t = torch.tensor(w_t, dtype=torch.float32) w_r = numpy.genfromtxt("params\\w_r.csv", delimiter=",") self.w_r = torch.tensor(w_r, dtype=torch.float32) w_l = numpy.genfromtxt("params\\w_l.csv", delimiter=",") self.w_l = torch.tensor(w_l, dtype=torch.float32) b_conv = numpy.genfromtxt("params\\b_conv.csv", delimiter=",") self.b_conv = torch.tensor(b_conv, dtype=torch.float32) w_hidden = numpy.genfromtxt("params\\w_hidden.csv", delimiter=",") self.w_hidden = torch.tensor(w_hidden, dtype=torch.float32) b_hidden = numpy.genfromtxt("params\\b_hidden.csv", delimiter=",") self.b_hidden = torch.tensor(b_hidden, dtype=torch.float32) # pooling method self.pooling = pooling if self.pooling == 'one-way pooling': self.pooling_layer = Pooling_layer() else: self.dynamic = Dynamic_pooling_layer() self.max_pool = Max_pooling_layer() ### Layers self.cod = Coding_layer(self.vector_size, self.w_comb1, self.w_comb2) self.conv = Convolutional_layer(self.vector_size, self.w_t, self.w_r, self.w_l, self.b_conv, features_size=self.feature_size) self.hidden = Hidden_layer(self.w_hidden, self.b_hidden)
class SecondNeuralNetwork(): def __init__(self, n=20, m=4, pooling='one-way pooling'): self.vector_size = n self.feature_size = m # parameters self.w_comb1 = torch.diag( torch.randn(self.vector_size, dtype=torch.float32)).requires_grad_() self.w_comb2 = torch.diag( torch.randn(self.vector_size, dtype=torch.float32)).requires_grad_() self.w_t = torch.randn(self.feature_size, self.vector_size, requires_grad=True) self.w_r = torch.randn(self.feature_size, self.vector_size, requires_grad=True) self.w_l = torch.randn(self.feature_size, self.vector_size, requires_grad=True) self.b_conv = torch.randn(self.feature_size, requires_grad=True) # pooling method self.pooling = pooling if self.pooling == 'three-way pooling': self.w_hidden = torch.randn(3, requires_grad=True) self.b_hidden = torch.randn(1, requires_grad=True) self.dynamic = Dynamic_pooling_layer() self.max_pool = Max_pooling_layer() else: self.w_hidden = torch.randn(self.feature_size, requires_grad=True) self.b_hidden = torch.randn(1, requires_grad=True) self.pooling = Pooling_layer() # layers self.cod = Coding_layer(self.vector_size, self.w_comb1, self.w_comb2) self.conv = Convolutional_layer(self.vector_size, self.w_t, self.w_r, self.w_l, self.b_conv, features_size=self.feature_size) self.hidden = Hidden_layer(self.w_hidden, self.b_hidden) def train(self, targets, training_dict, total_epochs=10, learning_rate=0.1): """Create the training loop""" # Construct the optimizer params = [ self.w_comb1, self.w_comb2, self.w_t, self.w_l, self.w_r, self.b_conv, self.w_hidden, self.b_hidden ] optimizer = torch.optim.SGD(params, lr=learning_rate) criterion = nn.BCELoss() print('The correct value of the files is: ', targets) for epoch in range(total_epochs): # Time start = time() outputs = self.forward(training_dict) try: loss = criterion(outputs, targets) except AttributeError: print( f'The size of outputs is {len(outputs)} and is of type {type(outputs)}' ) print('Check that the path is a folder and not a file') raise AttributeError # zero the parameter gradients print('outputs: \n', outputs) #print('Matrix w_r_conv: \n', params[4]) optimizer.zero_grad() # Calculates the derivative loss.backward(retain_graph=True) # Update parameters optimizer.step() #w_r = w_r - lr * w_r.grad #Time end = time() print('Epoch: ', epoch, ', Time: ', end - start, ', Loss: ', loss) message = f''' The loss we have for the training network is: {loss} ''' writer(message) self.save() def forward(self, training_dict): outputs = [] softmax = nn.Sigmoid() for filepath in training_dict.keys(): data = filepath ## forward (layers calculations) output = self.layers(training_dict[data]) # output append if outputs == []: outputs = softmax(output) else: outputs = torch.cat((outputs, softmax(output)), 0) return outputs def layers(self, vector_representation_params): ls_nodes = vector_representation_params[0] dict_ast_to_Node = vector_representation_params[1] dict_sibling = vector_representation_params[2] w_l_code = vector_representation_params[3] w_r_code = vector_representation_params[4] b_code = vector_representation_params[5] ls_nodes = self.cod.coding_layer(ls_nodes, dict_ast_to_Node, w_l_code, w_r_code, b_code) ls_nodes = self.conv.convolutional_layer(ls_nodes, dict_ast_to_Node) if self.pooling == 'three-way pooling': self.max_pool.max_pooling(ls_nodes) vector = self.dynamic.three_way_pooling(ls_nodes, dict_sibling) else: vector = self.pooling.pooling_layer(ls_nodes) output = self.hidden.hidden_layer(vector) return output def save(self): '''Save all the trained parameters into a csv file''' #parameters = pd.DataFrame({'w_comb1': self.w_comb1.detach().numpy(), 'w_comb2': self.w_comb2.detach().numpy(), 'w_t': self.w_t.detach().numpy(), 'w_l': self.w_l.detach().numpy(), 'w_r': self.w_r.detach().numpy(), 'b_conv': self.b_conv.detach().numpy(), 'w_hidden': self.w_hidden.detach().numpy(), 'b_hidden': self.b_hidden.detach().numpy()}) # save w_comb1 into csv file w_comb1 = self.w_comb1.detach().numpy() numpy.savetxt("params\\w_comb1.csv", w_comb1, delimiter=",") # save w_comb2 into csv file w_comb2 = self.w_comb2.detach().numpy() numpy.savetxt("params\\w_comb2.csv", w_comb2, delimiter=",") # save w_t into csv file w_t = self.w_t.detach().numpy() numpy.savetxt("params\\w_t.csv", w_t, delimiter=",") # save w_l into csv file w_l = self.w_l.detach().numpy() numpy.savetxt("params\\w_l.csv", w_l, delimiter=",") # save w_r into csv file w_r = self.w_r.detach().numpy() numpy.savetxt("params\\w_r.csv", w_r, delimiter=",") # save b_conv into csv file b_conv = self.b_conv.detach().numpy() numpy.savetxt("params\\b_conv.csv", b_conv, delimiter=",") # save w_hidden into csv file w_hidden = self.w_hidden.detach().numpy() numpy.savetxt("params\\w_hidden.csv", w_hidden, delimiter=",") # save b_conv into csv file b_hidden = self.b_hidden.detach().numpy() numpy.savetxt("params\\b_hidden.csv", b_hidden, delimiter=",")
class Validation_neural_network(): def __init__(self, n=30, m=100, pooling='one-way pooling'): self.vector_size = n self.feature_size = m # parameters w_comb1 = numpy.genfromtxt("params\\w_comb1.csv", delimiter=",") self.w_comb1 = torch.tensor(w_comb1, dtype=torch.float32) w_comb2 = numpy.genfromtxt("params\\w_comb2.csv", delimiter=",") self.w_comb2 = torch.tensor(w_comb2, dtype=torch.float32) w_t = numpy.genfromtxt("params\\w_t.csv", delimiter=",") self.w_t = torch.tensor(w_t, dtype=torch.float32) w_r = numpy.genfromtxt("params\\w_r.csv", delimiter=",") self.w_r = torch.tensor(w_r, dtype=torch.float32) w_l = numpy.genfromtxt("params\\w_l.csv", delimiter=",") self.w_l = torch.tensor(w_l, dtype=torch.float32) b_conv = numpy.genfromtxt("params\\b_conv.csv", delimiter=",") self.b_conv = torch.tensor(b_conv, dtype=torch.float32) w_hidden = numpy.genfromtxt("params\\w_hidden.csv", delimiter=",") self.w_hidden = torch.tensor(w_hidden, dtype=torch.float32) b_hidden = numpy.genfromtxt("params\\b_hidden.csv", delimiter=",") self.b_hidden = torch.tensor(b_hidden, dtype=torch.float32) # pooling method self.pooling = pooling if self.pooling == 'one-way pooling': self.pooling_layer = Pooling_layer() else: self.dynamic = Dynamic_pooling_layer() self.max_pool = Max_pooling_layer() ### Layers self.cod = Coding_layer(self.vector_size, self.w_comb1, self.w_comb2) self.conv = Convolutional_layer(self.vector_size, self.w_t, self.w_r, self.w_l, self.b_conv, features_size=self.feature_size) self.hidden = Hidden_layer(self.w_hidden, self.b_hidden) def validation(self, targets, validation_dict, learning_rate=0.3, momentum=0, l2_penalty=0, epoch_first=45): """Create the validation loop""" print('########################################') print( '\n\n\nFinished training process. Entering validation process\n\n\n' ) print("The correct value of the files is: ", targets) # We calculate the predictions predicts = self.prediction(validation_dict, learning_rate, momentum, l2_penalty, epoch_first) # print the predictions print('predictions: \n', predicts) # Loss function criterion = nn.BCELoss() loss = criterion(predicts, targets) # TODO Build the accuracy evaluation method for each file # Confusion matrix conf_matrix = self.conf_matrix(predicts, targets) print(conf_matrix) plot_confusion_matrix(conf_matrix, ['no generator', 'generator']) message = f''' For the validation set we have the following results: loss: {loss} confusion_matrix: {conf_matrix} ''' writer(message) print('Loss validation: ', loss) # correct += (predicted == labels).sum() accuracy = self.accuracy(predicts, targets) print('accuracy: ', accuracy) def prediction(self, validation_dict, learning_rate, momentum, l2_penalty, epoch_first): outputs = [] softmax = nn.Sigmoid() total = len(validation_dict) i = 1 for filepath in validation_dict: # first neural network validation_dict[filepath] = self.first_neural_network( filepath, learning_rate, momentum, l2_penalty, epoch_first) print( f"finished vector representation of file: {filepath} ({i}/{total}) \n" ) i += 1 ## forward (second neural network) output = self.second_neural_network(validation_dict[filepath]) # output append if outputs == []: outputs = torch.tensor([softmax(output)]) else: outputs = torch.cat((outputs, torch.tensor([softmax(output)])), 0) return outputs def first_neural_network(self, file, learning_rate, momentum, l2_penalty, epoch): '''Initializing node list, dict list and dict sibling''' # we parse the data of the file into a tree tree = file_parser(file) # convert its nodes into the Node class we have, and assign their attributes ls_nodes, dict_ast_to_Node = node_object_creator(tree) ls_nodes = node_position_assign(ls_nodes) ls_nodes, dict_sibling = node_sibling_assign(ls_nodes) ls_nodes = leaves_nodes_assign(ls_nodes, dict_ast_to_Node) # Initializing vector embeddings embed = Embedding(self.vector_size, ls_nodes, dict_ast_to_Node) ls_nodes = embed.node_embedding() # Calculate the vector representation for each node vector_representation = First_neural_network(ls_nodes, dict_ast_to_Node, self.vector_size, learning_rate, momentum, l2_penalty, epoch) ls_nodes, w_l_code, w_r_code, b_code = vector_representation.vector_representation( ) return [ ls_nodes, dict_ast_to_Node, dict_sibling, w_l_code, w_r_code, b_code ] def second_neural_network(self, vector_representation_params): ls_nodes = vector_representation_params[0] dict_ast_to_Node = vector_representation_params[1] dict_sibling = vector_representation_params[2] w_l_code = vector_representation_params[3] w_r_code = vector_representation_params[4] b_code = vector_representation_params[5] ls_nodes = self.cod.coding_layer(ls_nodes, dict_ast_to_Node, w_l_code, w_r_code, b_code) ls_nodes = self.conv.convolutional_layer(ls_nodes, dict_ast_to_Node) if self.pooling == 'one-way pooling': vector = self.pooling_layer.pooling_layer(ls_nodes) else: self.max_pool.max_pooling(ls_nodes) vector = self.dynamic.three_way_pooling(ls_nodes, dict_sibling) output = self.hidden.hidden_layer(vector) return output def accuracy(self, predicts, targets): with torch.no_grad(): rounded_prediction = torch.round(predicts) # 1 if false negative # -1 if false positive difference = targets - rounded_prediction errors = torch.abs(difference).sum() accuracy = (len(difference) - errors) / len(difference) return accuracy def conf_matrix(self, predicts, targets): with torch.no_grad(): rounded_prediction = torch.round(predicts) # 1 if false negative # -1 if false positive difference = targets - rounded_prediction # 0 if true negative # 2 if true positive addition = targets + rounded_prediction conf_matrix = torch.zeros(2, 2, dtype=torch.int64) # x axis are true values, and y axis are predictions for i in range(len(addition)): if difference[i] == 1: conf_matrix[1, 0] += 1 elif difference[i] == -1: conf_matrix[0, 1] += 1 elif addition[i] == 0: conf_matrix[0, 0] += 1 else: assert addition[i] == 2 conf_matrix[1, 1] += 1 return conf_matrix.numpy()