Пример #1
0
def set_up_dynamic_pooling_layer():
    tree = file_parser('test\pruebas.py')
    ls_nodes, dict_ast_to_Node = node_object_creator(tree)
    ls_nodes = node_position_assign(ls_nodes)
    ls_nodes, dict_sibling = node_sibling_assign(ls_nodes)
    embed = Embedding(20, ls_nodes, dict_ast_to_Node)
    ls_nodes = embed.node_embedding()[:]
    vector_representation = First_neural_network(ls_nodes, dict_ast_to_Node,
                                                 20, 0.1, 0.001)
    ls_nodes, w_l, w_r, b_code = vector_representation.vector_representation()
    w_comb1 = torch.diag(torch.randn(20, dtype=torch.float32)).requires_grad_()
    w_comb2 = torch.diag(torch.randn(20, dtype=torch.float32)).requires_grad_()
    coding_layer = Coding_layer(20, w_comb1, w_comb2)
    ls_nodes = coding_layer.coding_layer(ls_nodes, dict_ast_to_Node, w_l, w_r,
                                         b_code)
    w_t = torch.randn(4, 20, requires_grad=True)
    w_r = torch.randn(4, 20, requires_grad=True)
    w_l = torch.randn(4, 20, requires_grad=True)
    b_conv = torch.randn(4, requires_grad=True)
    convolutional_layer = Convolutional_layer(20,
                                              w_t,
                                              w_r,
                                              w_l,
                                              b_conv,
                                              features_size=4)
    ls_nodes = convolutional_layer.convolutional_layer(ls_nodes,
                                                       dict_ast_to_Node)
    max_pooling_layer = Max_pooling_layer()
    max_pooling_layer.max_pooling(ls_nodes)
    dynamic_pooling = Dynamic_pooling_layer()
    hidden_input = dynamic_pooling.three_way_pooling(ls_nodes, dict_sibling)

    return ls_nodes, hidden_input
Пример #2
0
def set_up_one_max_pooling_layer():
    path = os.path.join('test', 'generators')
    data = os.path.join(path, 'prueba.py')
    tree = file_parser(data)
    ls_nodes, dict_ast_to_Node = node_object_creator(tree)
    ls_nodes = node_position_assign(ls_nodes)
    ls_nodes, dict_sibling = node_sibling_assign(ls_nodes)
    ls_nodes = leaves_nodes_assign(ls_nodes, dict_ast_to_Node)
    ls_nodes = leaves_nodes_assign(ls_nodes, dict_ast_to_Node)
    embed = Embedding(20, ls_nodes, dict_ast_to_Node)
    ls_nodes = embed.node_embedding()[:]
    vector_representation = First_neural_network(ls_nodes, dict_ast_to_Node,
                                                 20, 0.1, 0.001, 0, 5)
    ls_nodes, w_l, w_r, b_code = vector_representation.vector_representation()
    w_comb1 = torch.diag(torch.randn(20, dtype=torch.float32)).requires_grad_()
    w_comb2 = torch.diag(torch.randn(20, dtype=torch.float32)).requires_grad_()
    coding_layer = Coding_layer(20, w_comb1, w_comb2)
    ls_nodes = coding_layer.coding_layer(ls_nodes, dict_ast_to_Node, w_l, w_r,
                                         b_code)
    w_t = torch.randn(4, 20, requires_grad=True)
    w_r = torch.randn(4, 20, requires_grad=True)
    w_l = torch.randn(4, 20, requires_grad=True)
    b_conv = torch.randn(4, requires_grad=True)
    convolutional_layer = Convolutional_layer(20,
                                              w_t,
                                              w_r,
                                              w_l,
                                              b_conv,
                                              features_size=4)
    ls_nodes = convolutional_layer.convolutional_layer(ls_nodes,
                                                       dict_ast_to_Node)
    pooling_layer = Pooling_layer()
    pooled_tensor = pooling_layer.pooling_layer(ls_nodes)

    return pooled_tensor
Пример #3
0
 def __init__(self, n=20, m=4, pooling='one-way pooling'):
     self.vector_size = n
     self.feature_size = m
     # parameters
     self.w_comb1 = torch.diag(
         torch.randn(self.vector_size,
                     dtype=torch.float32)).requires_grad_()
     self.w_comb2 = torch.diag(
         torch.randn(self.vector_size,
                     dtype=torch.float32)).requires_grad_()
     self.w_t = torch.randn(self.feature_size,
                            self.vector_size,
                            requires_grad=True)
     self.w_r = torch.randn(self.feature_size,
                            self.vector_size,
                            requires_grad=True)
     self.w_l = torch.randn(self.feature_size,
                            self.vector_size,
                            requires_grad=True)
     self.b_conv = torch.randn(self.feature_size, requires_grad=True)
     # pooling method
     self.pooling = pooling
     if self.pooling == 'three-way pooling':
         self.w_hidden = torch.randn(3, requires_grad=True)
         self.b_hidden = torch.randn(1, requires_grad=True)
         self.dynamic = Dynamic_pooling_layer()
         self.max_pool = Max_pooling_layer()
     else:
         self.w_hidden = torch.randn(self.feature_size, requires_grad=True)
         self.b_hidden = torch.randn(1, requires_grad=True)
         self.pooling = Pooling_layer()
     # layers
     self.cod = Coding_layer(self.vector_size, self.w_comb1, self.w_comb2)
     self.conv = Convolutional_layer(self.vector_size,
                                     self.w_t,
                                     self.w_r,
                                     self.w_l,
                                     self.b_conv,
                                     features_size=self.feature_size)
     self.hidden = Hidden_layer(self.w_hidden, self.b_hidden)
Пример #4
0
def set_up_hidden_layer():
    path = os.path.join('test', 'generators')
    data = os.path.join(path, 'prueba.py')
    tree = file_parser(data)
    ls_nodes, dict_ast_to_Node = node_object_creator(tree)
    ls_nodes = node_position_assign(ls_nodes)
    ls_nodes, dict_sibling = node_sibling_assign(ls_nodes)
    ls_nodes = leaves_nodes_assign(ls_nodes, dict_ast_to_Node)
    embed = Embedding(20, ls_nodes, dict_ast_to_Node)
    ls_nodes = embed.node_embedding()[:]
    vector_representation = First_neural_network(ls_nodes, dict_ast_to_Node,
                                                 20, 0.1, 0.001, 0, 5)
    ls_nodes, w_l, w_r, b_code = vector_representation.vector_representation()
    w_comb1 = torch.diag(torch.randn(20, dtype=torch.float32)).requires_grad_()
    w_comb2 = torch.diag(torch.randn(20, dtype=torch.float32)).requires_grad_()
    coding_layer = Coding_layer(20, w_comb1, w_comb2)
    ls_nodes = coding_layer.coding_layer(ls_nodes, dict_ast_to_Node, w_l, w_r,
                                         b_code)
    w_t = torch.randn(4, 20, requires_grad=True)
    w_r = torch.randn(4, 20, requires_grad=True)
    w_l = torch.randn(4, 20, requires_grad=True)
    b_conv = torch.randn(4, requires_grad=True)
    convolutional_layer = Convolutional_layer(20,
                                              w_t,
                                              w_r,
                                              w_l,
                                              b_conv,
                                              features_size=4)
    ls_nodes = convolutional_layer.convolutional_layer(ls_nodes,
                                                       dict_ast_to_Node)
    max_pooling_layer = Max_pooling_layer()
    max_pooling_layer.max_pooling(ls_nodes)
    dynamic_pooling = Dynamic_pooling_layer()
    hidden_input = dynamic_pooling.three_way_pooling(ls_nodes, dict_sibling)
    w_hidden = torch.randn(3, requires_grad=True)
    b_hidden = torch.randn(1, requires_grad=True)
    hidden = Hidden_layer(w_hidden, b_hidden)
    output_hidden = hidden.hidden_layer(hidden_input)

    return output_hidden, w_hidden, b_hidden
Пример #5
0
    def __init__(self, n=30, m=100, pooling='one-way pooling'):
        self.vector_size = n
        self.feature_size = m
        # parameters
        w_comb1 = numpy.genfromtxt("params\\w_comb1.csv", delimiter=",")
        self.w_comb1 = torch.tensor(w_comb1, dtype=torch.float32)
        w_comb2 = numpy.genfromtxt("params\\w_comb2.csv", delimiter=",")
        self.w_comb2 = torch.tensor(w_comb2, dtype=torch.float32)
        w_t = numpy.genfromtxt("params\\w_t.csv", delimiter=",")
        self.w_t = torch.tensor(w_t, dtype=torch.float32)
        w_r = numpy.genfromtxt("params\\w_r.csv", delimiter=",")
        self.w_r = torch.tensor(w_r, dtype=torch.float32)
        w_l = numpy.genfromtxt("params\\w_l.csv", delimiter=",")
        self.w_l = torch.tensor(w_l, dtype=torch.float32)
        b_conv = numpy.genfromtxt("params\\b_conv.csv", delimiter=",")
        self.b_conv = torch.tensor(b_conv, dtype=torch.float32)
        w_hidden = numpy.genfromtxt("params\\w_hidden.csv", delimiter=",")
        self.w_hidden = torch.tensor(w_hidden, dtype=torch.float32)
        b_hidden = numpy.genfromtxt("params\\b_hidden.csv", delimiter=",")
        self.b_hidden = torch.tensor(b_hidden, dtype=torch.float32)

        # pooling method
        self.pooling = pooling
        if self.pooling == 'one-way pooling':
            self.pooling_layer = Pooling_layer()
        else:
            self.dynamic = Dynamic_pooling_layer()
            self.max_pool = Max_pooling_layer()

        ### Layers
        self.cod = Coding_layer(self.vector_size, self.w_comb1, self.w_comb2)
        self.conv = Convolutional_layer(self.vector_size,
                                        self.w_t,
                                        self.w_r,
                                        self.w_l,
                                        self.b_conv,
                                        features_size=self.feature_size)
        self.hidden = Hidden_layer(self.w_hidden, self.b_hidden)
Пример #6
0
class SecondNeuralNetwork():
    def __init__(self, n=20, m=4, pooling='one-way pooling'):
        self.vector_size = n
        self.feature_size = m
        # parameters
        self.w_comb1 = torch.diag(
            torch.randn(self.vector_size,
                        dtype=torch.float32)).requires_grad_()
        self.w_comb2 = torch.diag(
            torch.randn(self.vector_size,
                        dtype=torch.float32)).requires_grad_()
        self.w_t = torch.randn(self.feature_size,
                               self.vector_size,
                               requires_grad=True)
        self.w_r = torch.randn(self.feature_size,
                               self.vector_size,
                               requires_grad=True)
        self.w_l = torch.randn(self.feature_size,
                               self.vector_size,
                               requires_grad=True)
        self.b_conv = torch.randn(self.feature_size, requires_grad=True)
        # pooling method
        self.pooling = pooling
        if self.pooling == 'three-way pooling':
            self.w_hidden = torch.randn(3, requires_grad=True)
            self.b_hidden = torch.randn(1, requires_grad=True)
            self.dynamic = Dynamic_pooling_layer()
            self.max_pool = Max_pooling_layer()
        else:
            self.w_hidden = torch.randn(self.feature_size, requires_grad=True)
            self.b_hidden = torch.randn(1, requires_grad=True)
            self.pooling = Pooling_layer()
        # layers
        self.cod = Coding_layer(self.vector_size, self.w_comb1, self.w_comb2)
        self.conv = Convolutional_layer(self.vector_size,
                                        self.w_t,
                                        self.w_r,
                                        self.w_l,
                                        self.b_conv,
                                        features_size=self.feature_size)
        self.hidden = Hidden_layer(self.w_hidden, self.b_hidden)

    def train(self,
              targets,
              training_dict,
              total_epochs=10,
              learning_rate=0.1):
        """Create the training loop"""
        # Construct the optimizer
        params = [
            self.w_comb1, self.w_comb2, self.w_t, self.w_l, self.w_r,
            self.b_conv, self.w_hidden, self.b_hidden
        ]
        optimizer = torch.optim.SGD(params, lr=learning_rate)
        criterion = nn.BCELoss()
        print('The correct value of the files is: ', targets)

        for epoch in range(total_epochs):
            # Time
            start = time()
            outputs = self.forward(training_dict)

            try:
                loss = criterion(outputs, targets)
            except AttributeError:
                print(
                    f'The size of outputs is {len(outputs)} and is of type {type(outputs)}'
                )
                print('Check that the path is a folder and not a file')
                raise AttributeError

            # zero the parameter gradients
            print('outputs: \n', outputs)
            #print('Matrix w_r_conv: \n', params[4])

            optimizer.zero_grad()

            # Calculates the derivative
            loss.backward(retain_graph=True)

            # Update parameters
            optimizer.step()  #w_r = w_r - lr * w_r.grad

            #Time
            end = time()

            print('Epoch: ', epoch, ', Time: ', end - start, ', Loss: ', loss)

        message = f'''
The loss we have for the training network is: {loss}
        '''
        writer(message)
        self.save()

    def forward(self, training_dict):
        outputs = []
        softmax = nn.Sigmoid()
        for filepath in training_dict.keys():
            data = filepath

            ## forward (layers calculations)
            output = self.layers(training_dict[data])

            # output append
            if outputs == []:
                outputs = softmax(output)
            else:
                outputs = torch.cat((outputs, softmax(output)), 0)

        return outputs

    def layers(self, vector_representation_params):
        ls_nodes = vector_representation_params[0]
        dict_ast_to_Node = vector_representation_params[1]
        dict_sibling = vector_representation_params[2]
        w_l_code = vector_representation_params[3]
        w_r_code = vector_representation_params[4]
        b_code = vector_representation_params[5]
        ls_nodes = self.cod.coding_layer(ls_nodes, dict_ast_to_Node, w_l_code,
                                         w_r_code, b_code)
        ls_nodes = self.conv.convolutional_layer(ls_nodes, dict_ast_to_Node)
        if self.pooling == 'three-way pooling':
            self.max_pool.max_pooling(ls_nodes)
            vector = self.dynamic.three_way_pooling(ls_nodes, dict_sibling)
        else:
            vector = self.pooling.pooling_layer(ls_nodes)
        output = self.hidden.hidden_layer(vector)

        return output

    def save(self):
        '''Save all the trained parameters into a csv file'''
        #parameters = pd.DataFrame({'w_comb1': self.w_comb1.detach().numpy(), 'w_comb2': self.w_comb2.detach().numpy(), 'w_t': self.w_t.detach().numpy(), 'w_l': self.w_l.detach().numpy(), 'w_r': self.w_r.detach().numpy(), 'b_conv': self.b_conv.detach().numpy(), 'w_hidden': self.w_hidden.detach().numpy(), 'b_hidden': self.b_hidden.detach().numpy()})
        # save w_comb1 into csv file
        w_comb1 = self.w_comb1.detach().numpy()
        numpy.savetxt("params\\w_comb1.csv", w_comb1, delimiter=",")

        # save w_comb2 into csv file
        w_comb2 = self.w_comb2.detach().numpy()
        numpy.savetxt("params\\w_comb2.csv", w_comb2, delimiter=",")

        # save w_t into csv file
        w_t = self.w_t.detach().numpy()
        numpy.savetxt("params\\w_t.csv", w_t, delimiter=",")

        # save w_l into csv file
        w_l = self.w_l.detach().numpy()
        numpy.savetxt("params\\w_l.csv", w_l, delimiter=",")

        # save w_r into csv file
        w_r = self.w_r.detach().numpy()
        numpy.savetxt("params\\w_r.csv", w_r, delimiter=",")

        # save b_conv into csv file
        b_conv = self.b_conv.detach().numpy()
        numpy.savetxt("params\\b_conv.csv", b_conv, delimiter=",")

        # save w_hidden into csv file
        w_hidden = self.w_hidden.detach().numpy()
        numpy.savetxt("params\\w_hidden.csv", w_hidden, delimiter=",")

        # save b_conv into csv file
        b_hidden = self.b_hidden.detach().numpy()
        numpy.savetxt("params\\b_hidden.csv", b_hidden, delimiter=",")
Пример #7
0
class Validation_neural_network():
    def __init__(self, n=30, m=100, pooling='one-way pooling'):
        self.vector_size = n
        self.feature_size = m
        # parameters
        w_comb1 = numpy.genfromtxt("params\\w_comb1.csv", delimiter=",")
        self.w_comb1 = torch.tensor(w_comb1, dtype=torch.float32)
        w_comb2 = numpy.genfromtxt("params\\w_comb2.csv", delimiter=",")
        self.w_comb2 = torch.tensor(w_comb2, dtype=torch.float32)
        w_t = numpy.genfromtxt("params\\w_t.csv", delimiter=",")
        self.w_t = torch.tensor(w_t, dtype=torch.float32)
        w_r = numpy.genfromtxt("params\\w_r.csv", delimiter=",")
        self.w_r = torch.tensor(w_r, dtype=torch.float32)
        w_l = numpy.genfromtxt("params\\w_l.csv", delimiter=",")
        self.w_l = torch.tensor(w_l, dtype=torch.float32)
        b_conv = numpy.genfromtxt("params\\b_conv.csv", delimiter=",")
        self.b_conv = torch.tensor(b_conv, dtype=torch.float32)
        w_hidden = numpy.genfromtxt("params\\w_hidden.csv", delimiter=",")
        self.w_hidden = torch.tensor(w_hidden, dtype=torch.float32)
        b_hidden = numpy.genfromtxt("params\\b_hidden.csv", delimiter=",")
        self.b_hidden = torch.tensor(b_hidden, dtype=torch.float32)

        # pooling method
        self.pooling = pooling
        if self.pooling == 'one-way pooling':
            self.pooling_layer = Pooling_layer()
        else:
            self.dynamic = Dynamic_pooling_layer()
            self.max_pool = Max_pooling_layer()

        ### Layers
        self.cod = Coding_layer(self.vector_size, self.w_comb1, self.w_comb2)
        self.conv = Convolutional_layer(self.vector_size,
                                        self.w_t,
                                        self.w_r,
                                        self.w_l,
                                        self.b_conv,
                                        features_size=self.feature_size)
        self.hidden = Hidden_layer(self.w_hidden, self.b_hidden)

    def validation(self,
                   targets,
                   validation_dict,
                   learning_rate=0.3,
                   momentum=0,
                   l2_penalty=0,
                   epoch_first=45):
        """Create the validation loop"""
        print('########################################')
        print(
            '\n\n\nFinished training process. Entering validation process\n\n\n'
        )
        print("The correct value of the files is: ", targets)

        # We calculate the predictions
        predicts = self.prediction(validation_dict, learning_rate, momentum,
                                   l2_penalty, epoch_first)
        # print the predictions
        print('predictions: \n', predicts)

        # Loss function
        criterion = nn.BCELoss()
        loss = criterion(predicts, targets)

        # TODO Build the accuracy evaluation method for each file
        # Confusion matrix
        conf_matrix = self.conf_matrix(predicts, targets)
        print(conf_matrix)
        plot_confusion_matrix(conf_matrix, ['no generator', 'generator'])

        message = f'''

For the validation set we have the following results:
loss: {loss}
confusion_matrix:
{conf_matrix}
        '''
        writer(message)
        print('Loss validation: ', loss)
        # correct += (predicted == labels).sum()
        accuracy = self.accuracy(predicts, targets)
        print('accuracy: ', accuracy)

    def prediction(self, validation_dict, learning_rate, momentum, l2_penalty,
                   epoch_first):
        outputs = []
        softmax = nn.Sigmoid()
        total = len(validation_dict)
        i = 1
        for filepath in validation_dict:
            # first neural network
            validation_dict[filepath] = self.first_neural_network(
                filepath, learning_rate, momentum, l2_penalty, epoch_first)
            print(
                f"finished vector representation of file: {filepath} ({i}/{total}) \n"
            )
            i += 1
            ## forward (second neural network)
            output = self.second_neural_network(validation_dict[filepath])

            # output append
            if outputs == []:
                outputs = torch.tensor([softmax(output)])
            else:
                outputs = torch.cat((outputs, torch.tensor([softmax(output)])),
                                    0)

        return outputs

    def first_neural_network(self, file, learning_rate, momentum, l2_penalty,
                             epoch):
        '''Initializing node list, dict list and dict sibling'''
        # we parse the data of the file into a tree
        tree = file_parser(file)
        # convert its nodes into the Node class we have, and assign their attributes
        ls_nodes, dict_ast_to_Node = node_object_creator(tree)
        ls_nodes = node_position_assign(ls_nodes)
        ls_nodes, dict_sibling = node_sibling_assign(ls_nodes)
        ls_nodes = leaves_nodes_assign(ls_nodes, dict_ast_to_Node)

        # Initializing vector embeddings
        embed = Embedding(self.vector_size, ls_nodes, dict_ast_to_Node)
        ls_nodes = embed.node_embedding()

        # Calculate the vector representation for each node
        vector_representation = First_neural_network(ls_nodes,
                                                     dict_ast_to_Node,
                                                     self.vector_size,
                                                     learning_rate, momentum,
                                                     l2_penalty, epoch)
        ls_nodes, w_l_code, w_r_code, b_code = vector_representation.vector_representation(
        )

        return [
            ls_nodes, dict_ast_to_Node, dict_sibling, w_l_code, w_r_code,
            b_code
        ]

    def second_neural_network(self, vector_representation_params):
        ls_nodes = vector_representation_params[0]
        dict_ast_to_Node = vector_representation_params[1]
        dict_sibling = vector_representation_params[2]
        w_l_code = vector_representation_params[3]
        w_r_code = vector_representation_params[4]
        b_code = vector_representation_params[5]
        ls_nodes = self.cod.coding_layer(ls_nodes, dict_ast_to_Node, w_l_code,
                                         w_r_code, b_code)
        ls_nodes = self.conv.convolutional_layer(ls_nodes, dict_ast_to_Node)
        if self.pooling == 'one-way pooling':
            vector = self.pooling_layer.pooling_layer(ls_nodes)
        else:
            self.max_pool.max_pooling(ls_nodes)
            vector = self.dynamic.three_way_pooling(ls_nodes, dict_sibling)
        output = self.hidden.hidden_layer(vector)

        return output

    def accuracy(self, predicts, targets):
        with torch.no_grad():
            rounded_prediction = torch.round(predicts)

        # 1 if false negative
        # -1 if false positive
        difference = targets - rounded_prediction
        errors = torch.abs(difference).sum()

        accuracy = (len(difference) - errors) / len(difference)

        return accuracy

    def conf_matrix(self, predicts, targets):
        with torch.no_grad():
            rounded_prediction = torch.round(predicts)

        # 1 if false negative
        # -1 if false positive
        difference = targets - rounded_prediction

        # 0 if true negative
        # 2 if true positive
        addition = targets + rounded_prediction

        conf_matrix = torch.zeros(2, 2, dtype=torch.int64)
        # x axis are true values, and y axis are predictions
        for i in range(len(addition)):
            if difference[i] == 1:
                conf_matrix[1, 0] += 1
            elif difference[i] == -1:
                conf_matrix[0, 1] += 1
            elif addition[i] == 0:
                conf_matrix[0, 0] += 1
            else:
                assert addition[i] == 2
                conf_matrix[1, 1] += 1

        return conf_matrix.numpy()