Пример #1
0
def set_up_dynamic_pooling_layer():
    tree = file_parser('test\pruebas.py')
    ls_nodes, dict_ast_to_Node = node_object_creator(tree)
    ls_nodes = node_position_assign(ls_nodes)
    ls_nodes, dict_sibling = node_sibling_assign(ls_nodes)
    embed = Embedding(20, ls_nodes, dict_ast_to_Node)
    ls_nodes = embed.node_embedding()[:]
    vector_representation = First_neural_network(ls_nodes, dict_ast_to_Node,
                                                 20, 0.1, 0.001)
    ls_nodes, w_l, w_r, b_code = vector_representation.vector_representation()
    w_comb1 = torch.diag(torch.randn(20, dtype=torch.float32)).requires_grad_()
    w_comb2 = torch.diag(torch.randn(20, dtype=torch.float32)).requires_grad_()
    coding_layer = Coding_layer(20, w_comb1, w_comb2)
    ls_nodes = coding_layer.coding_layer(ls_nodes, dict_ast_to_Node, w_l, w_r,
                                         b_code)
    w_t = torch.randn(4, 20, requires_grad=True)
    w_r = torch.randn(4, 20, requires_grad=True)
    w_l = torch.randn(4, 20, requires_grad=True)
    b_conv = torch.randn(4, requires_grad=True)
    convolutional_layer = Convolutional_layer(20,
                                              w_t,
                                              w_r,
                                              w_l,
                                              b_conv,
                                              features_size=4)
    ls_nodes = convolutional_layer.convolutional_layer(ls_nodes,
                                                       dict_ast_to_Node)
    max_pooling_layer = Max_pooling_layer()
    max_pooling_layer.max_pooling(ls_nodes)
    dynamic_pooling = Dynamic_pooling_layer()
    hidden_input = dynamic_pooling.three_way_pooling(ls_nodes, dict_sibling)

    return ls_nodes, hidden_input
Пример #2
0
    def first_neural_network(self, file, learning_rate, momentum, l2_penalty,
                             epoch):
        '''Initializing node list, dict list and dict sibling'''
        # we parse the data of the file into a tree
        tree = file_parser(file)
        # convert its nodes into the Node class we have, and assign their attributes
        ls_nodes, dict_ast_to_Node = node_object_creator(tree)
        ls_nodes = node_position_assign(ls_nodes)
        ls_nodes, dict_sibling = node_sibling_assign(ls_nodes)
        ls_nodes = leaves_nodes_assign(ls_nodes, dict_ast_to_Node)

        # Initializing vector embeddings
        embed = Embedding(self.vector_size, ls_nodes, dict_ast_to_Node)
        ls_nodes = embed.node_embedding()

        # Calculate the vector representation for each node
        vector_representation = First_neural_network(ls_nodes,
                                                     dict_ast_to_Node,
                                                     self.vector_size,
                                                     learning_rate, momentum,
                                                     l2_penalty, epoch)
        ls_nodes, w_l_code, w_r_code, b_code = vector_representation.vector_representation(
        )

        return [
            ls_nodes, dict_ast_to_Node, dict_sibling, w_l_code, w_r_code,
            b_code
        ]
Пример #3
0
def set_up_matrix():
    tree = file_parser('test\pruebas.py')
    ls_nodes, dict_ast_to_Node = node_object_creator(tree)
    embed = Embedding(20, ls_nodes, dict_ast_to_Node)
    ls_nodes = embed.node_embedding()[:]
    matrices = MatrixGenerator(20, 10)
    return matrices
Пример #4
0
def set_up_one_max_pooling_layer():
    path = os.path.join('test', 'generators')
    data = os.path.join(path, 'prueba.py')
    tree = file_parser(data)
    ls_nodes, dict_ast_to_Node = node_object_creator(tree)
    ls_nodes = node_position_assign(ls_nodes)
    ls_nodes, dict_sibling = node_sibling_assign(ls_nodes)
    ls_nodes = leaves_nodes_assign(ls_nodes, dict_ast_to_Node)
    ls_nodes = leaves_nodes_assign(ls_nodes, dict_ast_to_Node)
    embed = Embedding(20, ls_nodes, dict_ast_to_Node)
    ls_nodes = embed.node_embedding()[:]
    vector_representation = First_neural_network(ls_nodes, dict_ast_to_Node,
                                                 20, 0.1, 0.001, 0, 5)
    ls_nodes, w_l, w_r, b_code = vector_representation.vector_representation()
    w_comb1 = torch.diag(torch.randn(20, dtype=torch.float32)).requires_grad_()
    w_comb2 = torch.diag(torch.randn(20, dtype=torch.float32)).requires_grad_()
    coding_layer = Coding_layer(20, w_comb1, w_comb2)
    ls_nodes = coding_layer.coding_layer(ls_nodes, dict_ast_to_Node, w_l, w_r,
                                         b_code)
    w_t = torch.randn(4, 20, requires_grad=True)
    w_r = torch.randn(4, 20, requires_grad=True)
    w_l = torch.randn(4, 20, requires_grad=True)
    b_conv = torch.randn(4, requires_grad=True)
    convolutional_layer = Convolutional_layer(20,
                                              w_t,
                                              w_r,
                                              w_l,
                                              b_conv,
                                              features_size=4)
    ls_nodes = convolutional_layer.convolutional_layer(ls_nodes,
                                                       dict_ast_to_Node)
    pooling_layer = Pooling_layer()
    pooled_tensor = pooling_layer.pooling_layer(ls_nodes)

    return pooled_tensor
Пример #5
0
def set_up_matrix():
    path = os.path.join('test', 'generators')
    data = os.path.join(path, 'prueba.py')
    tree = file_parser(data)
    ls_nodes, dict_ast_to_Node = node_object_creator(tree)
    embed = Embedding(20, ls_nodes, dict_ast_to_Node)
    ls_nodes = embed.node_embedding()[:]
    matrices = MatrixGenerator(20, 10)
    return matrices
Пример #6
0
def build_model(options):
    print('Build model...')
    sys.stdout.flush()
    weights = None
    if options['flag_random_lookup_table'] == False: weights = options['embedding']
    embed_layer = Embedding(input_dim = options['embedding'].shape[0], 
                            output_dim = options['embedding'].shape[1], 
                            weights = weights)
    dense_layers = []
    dense_layers.append(Dense(input_dim = options['embedding'].shape[1] * 2, output_dim = options['size_hidden_layer'], activation = 'tanh'))
    dense_layers.append(Dense(input_dim = options['size_hidden_layer'], output_dim = 1, activation = 'sigmoid'))
    
    # for training
    sentence1 = T.imatrix('s1')  # sentence1, n_samples * len_sentence
    sentence1_mask = T.matrix('s1_mask')
    sentence2 = T.imatrix('s2')  # sentence2, n_samples * len_sentence
    sentence2_mask = T.matrix('s2_mask')
    y = T.ivector('y1')  # n_samples
    
    embed_s1 = embed_layer.get_output(sentence1) # n_samples * len_sentence * embed_dim
    embed_s2 = embed_layer.get_output(sentence2) # n_samples * len_sentence * embed_dim
    if options['sentence_modeling'] == 'CBoW':
        embed_s1 = ave_embed(embed_s1,sentence1_mask) # n_samples * embed_dim
        embed_s2 = ave_embed(embed_s2,sentence2_mask) # n_samples * embed_dim
    elif options['sentence_modeling'] == 'CNN':
        sentence_encode_layer = Convolution1D(input_dim = options['embedding'].shape[1], activation = 'tanh',
                                nb_filter = options['embedding'].shape[1], filter_length = options['CNN_filter_length'],
                                border_mode = 'same')
        embed_s1 = CNN_embed(embed_s1,sentence1_mask,sentence_encode_layer) # n_samples * embed_dim
        embed_s2 = CNN_embed(embed_s2,sentence2_mask,sentence_encode_layer) # n_samples * embed_dim
    elif options['sentence_modeling'] == 'LSTM':
        sentence_encode_layer = LSTM(input_dim = options['embedding'].shape[1], output_dim = options['embedding'].shape[1])
        embed_s1 = LSTM_embed(embed_s1,sentence1_mask,sentence_encode_layer,options) # n_samples * embed_dim
        embed_s2 = LSTM_embed(embed_s2,sentence2_mask,sentence_encode_layer,options) # n_samples * embed_dim
    else:
        print 'Error: No model called %s available!' % options['sentence_modeling']
        return
    
    output = T.concatenate([embed_s1,embed_s2],axis = -1) # n_samples * (embed_dim * 2)
    
    if options['flag_dropout'] == True:
        output = dropout(output, level=options['dropoutRates'])
    for dense_layer in dense_layers:
        output = dense_layer.get_output(output)
    f_pred = theano.function([sentence1,sentence1_mask,sentence2,sentence2_mask],output, allow_input_downcast=True)
    
    output = output.reshape((output.shape[0],))
    #y = y.reshape((output.shape[0],1))
    cost = T.nnet.binary_crossentropy(output, y).mean()
    f_debug = theano.function([sentence1,sentence1_mask,sentence2,sentence2_mask,y],[output,y,T.nnet.binary_crossentropy(output, y),cost], allow_input_downcast=True)
    tparams = []
    tparams += embed_layer.params
    if options['sentence_modeling'] != 'CBoW':
        tparams += sentence_encode_layer.params
    for dense_layer in dense_layers: tparams += dense_layer.params
    return sentence1,sentence1_mask,sentence2,sentence2_mask,y,cost,f_pred,tparams,f_debug
    
Пример #7
0
def set_up_vector_representation():
    tree = file_parser('test\pruebas.py')
    ls_nodes, dict_ast_to_Node = node_object_creator(tree)
    embed = Embedding(20, ls_nodes, dict_ast_to_Node)
    ls_nodes = embed.node_embedding()[:]
    vector_representation = First_neural_network(ls_nodes, dict_ast_to_Node,
                                                 20, 0.1, 0.001)
    ls_nodes, w_l, w_r, b_code = vector_representation.vector_representation()
    return ls_nodes, w_l, w_r, b_code
Пример #8
0
def set_up_vector_representation():
    path = os.path.join('test', 'generators')
    data = os.path.join(path, 'prueba.py')
    tree = file_parser(data)
    ls_nodes, dict_ast_to_Node = node_object_creator(tree)
    ls_nodes = leaves_nodes_assign(ls_nodes, dict_ast_to_Node)
    embed = Embedding(20, ls_nodes, dict_ast_to_Node)
    ls_nodes = embed.node_embedding()[:]
    vector_representation = First_neural_network(ls_nodes, dict_ast_to_Node,
                                                 20, 0.1, 0.001, 0, 5)
    ls_nodes, w_l, w_r, b_code = vector_representation.vector_representation()
    return ls_nodes, w_l, w_r, b_code
Пример #9
0
def set_up_coding_layer():
    tree = file_parser('test\pruebas.py')
    ls_nodes, dict_ast_to_Node = node_object_creator(tree)
    embed = Embedding(20, ls_nodes, dict_ast_to_Node)
    ls_nodes = embed.node_embedding()[:]
    vector_representation = First_neural_network(ls_nodes, dict_ast_to_Node,
                                                 20, 0.1, 0.001)
    ls_nodes, w_l, w_r, b_code = vector_representation.vector_representation()
    w_comb1 = torch.diag(torch.randn(20, dtype=torch.float32)).requires_grad_()
    w_comb2 = torch.diag(torch.randn(20, dtype=torch.float32)).requires_grad_()
    coding_layer = Coding_layer(20, w_comb1, w_comb2)
    ls_nodes = coding_layer.coding_layer(ls_nodes, dict_ast_to_Node, w_l, w_r,
                                         b_code)
    return ls_nodes, w_comb1, w_comb2
Пример #10
0
def set_up_embeddings():
    path = os.path.join('test', 'generators')
    data = os.path.join(path, 'prueba.py')
    tree = file_parser(data)
    ls_nodes, dict_ast_to_Node = node_object_creator(tree)
    embed = Embedding(20, ls_nodes, dict_ast_to_Node)
    return embed
Пример #11
0
def set_up_coding_layer():
    path = os.path.join('test', 'generators')
    data = os.path.join(path, 'prueba.py')
    tree = file_parser(data)
    ls_nodes, dict_ast_to_Node = node_object_creator(tree)
    ls_nodes = leaves_nodes_assign(ls_nodes, dict_ast_to_Node)
    embed = Embedding(20, ls_nodes, dict_ast_to_Node)
    ls_nodes = embed.node_embedding()[:]
    vector_representation = First_neural_network(ls_nodes, dict_ast_to_Node,
                                                 20, 0.1, 0.001, 0, 5)
    ls_nodes, w_l, w_r, b_code = vector_representation.vector_representation()
    w_comb1 = torch.diag(torch.randn(20, dtype=torch.float32)).requires_grad_()
    w_comb2 = torch.diag(torch.randn(20, dtype=torch.float32)).requires_grad_()
    coding_layer = Coding_layer(20, w_comb1, w_comb2)
    ls_nodes = coding_layer.coding_layer(ls_nodes, dict_ast_to_Node, w_l, w_r,
                                         b_code)
    return ls_nodes, w_comb1, w_comb2
Пример #12
0
def set_up_hidden_layer():
    path = os.path.join('test', 'generators')
    data = os.path.join(path, 'prueba.py')
    tree = file_parser(data)
    ls_nodes, dict_ast_to_Node = node_object_creator(tree)
    ls_nodes = node_position_assign(ls_nodes)
    ls_nodes, dict_sibling = node_sibling_assign(ls_nodes)
    ls_nodes = leaves_nodes_assign(ls_nodes, dict_ast_to_Node)
    embed = Embedding(20, ls_nodes, dict_ast_to_Node)
    ls_nodes = embed.node_embedding()[:]
    vector_representation = First_neural_network(ls_nodes, dict_ast_to_Node,
                                                 20, 0.1, 0.001, 0, 5)
    ls_nodes, w_l, w_r, b_code = vector_representation.vector_representation()
    w_comb1 = torch.diag(torch.randn(20, dtype=torch.float32)).requires_grad_()
    w_comb2 = torch.diag(torch.randn(20, dtype=torch.float32)).requires_grad_()
    coding_layer = Coding_layer(20, w_comb1, w_comb2)
    ls_nodes = coding_layer.coding_layer(ls_nodes, dict_ast_to_Node, w_l, w_r,
                                         b_code)
    w_t = torch.randn(4, 20, requires_grad=True)
    w_r = torch.randn(4, 20, requires_grad=True)
    w_l = torch.randn(4, 20, requires_grad=True)
    b_conv = torch.randn(4, requires_grad=True)
    convolutional_layer = Convolutional_layer(20,
                                              w_t,
                                              w_r,
                                              w_l,
                                              b_conv,
                                              features_size=4)
    ls_nodes = convolutional_layer.convolutional_layer(ls_nodes,
                                                       dict_ast_to_Node)
    max_pooling_layer = Max_pooling_layer()
    max_pooling_layer.max_pooling(ls_nodes)
    dynamic_pooling = Dynamic_pooling_layer()
    hidden_input = dynamic_pooling.three_way_pooling(ls_nodes, dict_sibling)
    w_hidden = torch.randn(3, requires_grad=True)
    b_hidden = torch.randn(1, requires_grad=True)
    hidden = Hidden_layer(w_hidden, b_hidden)
    output_hidden = hidden.hidden_layer(hidden_input)

    return output_hidden, w_hidden, b_hidden
Пример #13
0
def first_neural_network(training_dict,
                         vector_size=20,
                         learning_rate=0.1,
                         momentum=0.01,
                         l2_penalty=0,
                         epoch=45):
    total = len(training_dict)
    i = 1
    for data in training_dict:
        # Initializing node list, dict list and dict sibling

        # we parse the data of the file into a tree
        tree = file_parser(data)
        # convert its nodes into the Node class we have, and assign their attributes
        ls_nodes, dict_ast_to_Node = node_object_creator(tree)
        ls_nodes = node_position_assign(ls_nodes)
        ls_nodes, dict_sibling = node_sibling_assign(ls_nodes)
        ls_nodes = leaves_nodes_assign(ls_nodes, dict_ast_to_Node)

        # Initializing vector embeddings
        embed = Embedding(vector_size, ls_nodes, dict_ast_to_Node)
        ls_nodes = embed.node_embedding()

        # Calculate the vector representation for each node
        vector_representation = First_neural_network(ls_nodes,
                                                     dict_ast_to_Node,
                                                     vector_size,
                                                     learning_rate, momentum,
                                                     l2_penalty, epoch)
        ls_nodes, w_l_code, w_r_code, b_code = vector_representation.vector_representation(
        )

        training_dict[data] = [
            ls_nodes, dict_ast_to_Node, dict_sibling, w_l_code, w_r_code,
            b_code
        ]
        print(f"finished vector representation of file: {data} ({i}/{total})")
        i += 1
    return training_dict
Пример #14
0
def load_word_embeddings(filename):
    """Load word embeddings from a .txt file.
    """
    if not filename.endswith('.npz'):
        msg = 'Pretrained word embeddings must be in .npz'
        mst += ' Received ' + filename
        raise ValueError(msg)
    data = dict(np.load(filename))
    return {
        'E': Embedding(data['W']),
        'idx2word': data['idx2word'],
        'word2idx': {i: word
                     for word, i in enumerate(data['idx2word'])}
    }
Пример #15
0
    def __init__(self, config):
        super().__init__()
        self.config = config

        self.embedding = Embedding(self.config.num_words,
                                   self.config.dim_word,
                                   fix_word_embed=self.config.fix_word_embed)
        dim_word = self.config.dim_word

        self.rnn = self.rnn_factory(self.config.rnn_type,
                                    input_size=dim_word,
                                    hidden_size=self.config.rnn_hidden_size,
                                    num_layers=self.config.num_layers,
                                    dropout=self.config.dropout,
                                    bidirectional=self.config.bidirectional,
                                    bias=True,
                                    batch_first=True)

        self.dropout = nn.Dropout(self.config.dropout)
        self.init_weights()
Пример #16
0
def load_subword_embeddings(filename):
    """Load subword embeddings from a .npz file.
    """
    if not filename.endswith('.npz'):
        msg = 'Pretrained word embeddings must be in .npz'
        mst += ' Received ' + filename
        raise ValueError(msg)
    data = dict(np.load(filename))
    N = data['seqs'].shape[0]
    D = data['W'].shape[1]
    W = np.r_[np.zeros((1, D)), data['W']]
    seqs = [[v + 1 for v in seq] for seq in data['seqs']]  # 0=PAD
    idx2id = [0] + list(data['idx2id'])  # 0=PAD
    return {
        'E': Embedding(W),
        'F': SubwordEmbedding(D, n_layers=0),
        'vecs': torch.FloatTensor(np.empty((N, D))),
        'seqs': torch.LongTensor(pad(seqs)),
        'idx2id': idx2id,
        'id2idx': {i: idx
                   for idx, i in enumerate(idx2id)}
    }
Пример #17
0
import sys
sys.path.append(
    "/home/ubuntu/Files/Restaurant_Reviews/Restaurant-Reviews-Sentiment-Analysis"
)

import numpy as np
from numpy import array, amax, amin, sum
import pickle
from scipy.spatial.distance import cdist

from preprocess import clean
clean_obj = clean()

from embeddings import Embedding
Embedding_obj = Embedding()

# load the model from disk
filename = '/home/ubuntu/Files/Restaurant_Reviews/Restaurant-Reviews-Sentiment-Analysis/ML_model_trained_weights/naive_bayes_model.sav'  # (Acc. 91.5 % but Less responce time ~220ms)
loaded_model = pickle.load(open(filename, 'rb'))

# Loading the list containing 1000x768 BERT embeddings
filename_2 = '/home/ubuntu/Files/Restaurant_Reviews/Restaurant-Reviews-Sentiment-Analysis/Train_data_BERT_Embeddings/RR_Positive_Train_data_Bert_embeddings.sav'
filename_3 = '/home/ubuntu/Files/Restaurant_Reviews/Restaurant-Reviews-Sentiment-Analysis/Train_data_BERT_Embeddings/RR_Negative_Train_data_Bert_embeddings.sav'

RR_Positive_dataset_emd = pickle.load(open(filename_2, 'rb'))
RR_Negative_dataset_emd = pickle.load(open(filename_3, 'rb'))


class Restaurant_Reviews:
    def predict(self, df):
        sample_data = df.iloc[0, :]
Пример #18
0
    maxDsLength = 200
    n_datasets = len(datasets)
    for i in range(len(datasets)):
        X, y = generateDatasetFromString(datasets[i], seq_length, embedding)
        keras_ds_x.append(X)
        keras_ds_y.append(y)

        if (len(keras_ds_x) >= maxDsLength):
            # merge and yield
            Xconcat = np.concatenate(keras_ds_x)
            Yconcat = np.concatenate(keras_ds_y)

            yield Xconcat, Yconcat, n_datasets
            keras_ds_x = []
            keras_ds_y = []

    if (len(keras_ds_x) > 0):
        # merge and yield
        Xconcat = np.concatenate(keras_ds_x)
        Yconcat = np.concatenate(keras_ds_y)
        keras_ds_x = []
        keras_ds_y = []
        yield Xconcat, Yconcat, n_datasets


if __name__ == "__main__":
    embedding = Embedding("Norsk_embeddings")
    for ds in generateDatasetFromTokenizedDataset(
            "data/tokenized/mftd_norwegian", 5, embedding):
        print(ds)
Пример #19
0
from preprocessing import generateDatasetFromString
from gensim.models import KeyedVectors
from embeddings import Embedding
import os
import numpy as np
import gc

seqLen = 10
embedding = Embedding('preprocess_data/outofvocabonly')
basepath = os.path.normpath(os.path.realpath(__file__))
while os.path.basename(basepath) != "Minerva":
    basepath = os.path.dirname(basepath)

directory = os.path.normpath(
    os.path.join(basepath, "data/clean/mftd_norwegian"))
directoryOut = os.path.normpath(
    os.path.join(basepath, "data/tokenized/mftd_norwegian"))

seenWords = set()
wordList = []
xs = []
ys = []
for filename in os.listdir(directory):
    with open(directory + "/" + filename, 'r') as file:
        data = file.read().split()
        for word in data:
            if word not in seenWords:
                seenWords.add(word)
                wordList.append(word)

word2index = {k: i for i, k in enumerate(wordList)}
Пример #20
0
def build_model(options):
    print('Build model...')
    sys.stdout.flush()
    weights = None
    if options['flag_random_lookup_table'] == False:
        weights = options['embedding']
    embed_layer = Embedding(input_dim=options['embedding'].shape[0],
                            output_dim=options['embedding'].shape[1],
                            weights=weights)
    dense_layers = []
    dense_layers.append(
        Dense(input_dim=options['embedding'].shape[1] * 2,
              output_dim=options['size_hidden_layer'],
              activation='tanh'))
    dense_layers.append(
        Dense(input_dim=options['size_hidden_layer'],
              output_dim=1,
              activation='sigmoid'))

    # for training
    sentence1 = T.imatrix('s1')  # sentence1, n_samples * len_sentence
    sentence1_mask = T.matrix('s1_mask')
    sentence2 = T.imatrix('s2')  # sentence2, n_samples * len_sentence
    sentence2_mask = T.matrix('s2_mask')
    y = T.ivector('y1')  # n_samples

    embed_s1 = embed_layer.get_output(
        sentence1)  # n_samples * len_sentence * embed_dim
    embed_s2 = embed_layer.get_output(
        sentence2)  # n_samples * len_sentence * embed_dim
    if options['sentence_modeling'] == 'CBoW':
        embed_s1 = ave_embed(embed_s1, sentence1_mask)  # n_samples * embed_dim
        embed_s2 = ave_embed(embed_s2, sentence2_mask)  # n_samples * embed_dim
    elif options['sentence_modeling'] == 'CNN':
        sentence_encode_layer = Convolution1D(
            input_dim=options['embedding'].shape[1],
            activation='tanh',
            nb_filter=options['embedding'].shape[1],
            filter_length=options['CNN_filter_length'],
            border_mode='same')
        embed_s1 = CNN_embed(embed_s1, sentence1_mask,
                             sentence_encode_layer)  # n_samples * embed_dim
        embed_s2 = CNN_embed(embed_s2, sentence2_mask,
                             sentence_encode_layer)  # n_samples * embed_dim
    elif options['sentence_modeling'] == 'LSTM':
        sentence_encode_layer = LSTM(input_dim=options['embedding'].shape[1],
                                     output_dim=options['embedding'].shape[1])
        embed_s1 = LSTM_embed(embed_s1, sentence1_mask, sentence_encode_layer,
                              options)  # n_samples * embed_dim
        embed_s2 = LSTM_embed(embed_s2, sentence2_mask, sentence_encode_layer,
                              options)  # n_samples * embed_dim
    else:
        print 'Error: No model called %s available!' % options[
            'sentence_modeling']
        return

    output = T.concatenate([embed_s1, embed_s2],
                           axis=-1)  # n_samples * (embed_dim * 2)

    if options['flag_dropout'] == True:
        output = dropout(output, level=options['dropoutRates'])
    for dense_layer in dense_layers:
        output = dense_layer.get_output(output)
    f_pred = theano.function(
        [sentence1, sentence1_mask, sentence2, sentence2_mask],
        output,
        allow_input_downcast=True)

    output = output.reshape((output.shape[0], ))
    #y = y.reshape((output.shape[0],1))
    cost = T.nnet.binary_crossentropy(output, y).mean()
    f_debug = theano.function(
        [sentence1, sentence1_mask, sentence2, sentence2_mask, y],
        [output, y, T.nnet.binary_crossentropy(output, y), cost],
        allow_input_downcast=True)
    tparams = []
    tparams += embed_layer.params
    if options['sentence_modeling'] != 'CBoW':
        tparams += sentence_encode_layer.params
    for dense_layer in dense_layers:
        tparams += dense_layer.params
    return sentence1, sentence1_mask, sentence2, sentence2_mask, y, cost, f_pred, tparams, f_debug
Пример #21
0
max_len = 256
model_dim = 64
batch_size = 128
epochs = 10

print("Data downloading and pre-processing ... ")
(x_train, y_train), (x_test, y_test) = imdb.load_data(maxlen=max_len,
                                                      num_words=vocab_size)
x_train = sequence.pad_sequences(x_train, maxlen=max_len)
x_test = sequence.pad_sequences(x_test, maxlen=max_len)
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

print('Model building ... ')
inputs = Input(shape=(max_len, ), name="inputs")
embeddings = Embedding(vocab_size, model_dim, scale=False)(inputs)
outputs = BiDirectional(GRU(model_dim, return_outputs=True))(embeddings)
x = GlobalAveragePooling1D()(outputs)
x = Dropout(0.2)(x)
x = Dense(10, activation='relu')(x)
outputs = Dense(2, activation='softmax')(x)

model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer=Adam(beta_1=0.9, beta_2=0.98, epsilon=1e-9),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

print("Model Training ... ")
es = EarlyStopping(patience=5)
model.fit(x_train,
          y_train,
Пример #22
0
def set_up_embeddings():
    tree = file_parser('test\pruebas.py')
    ls_nodes, dict_ast_to_Node = node_object_creator(tree)
    embed = Embedding(20, ls_nodes, dict_ast_to_Node)
    return embed
Пример #23
0
with open(sys.argv[1], 'r') as file:
    config = json.loads(file.read())

import numpy as np
from embeddings import Embedding
from model import LSTMModel
import os

basepath = os.path.normpath(os.path.realpath(__file__))
while os.path.basename(basepath) != "Minerva":
    basepath = os.path.dirname(basepath)

embeddingPath = os.path.join(
    basepath, "LSTM/preprocess_data/{}".format("outofvocabonly"))

embedding = Embedding(embeddingPath)
embeddingMatrix = os.path.join(
    basepath, "data/tokenized/mftd_norwegian/embeddingMatrix.npy")
seq_length = config["seq_length"]
model = LSTMModel(seq_length, embeddingMatrix, config["layers"],
                  config["dropout_layers"], config["action"], embedding)

if (config["use-checkpoint"]):
    checkpoint_dir = "checkpoints/" + config["checkpoint_dir"]
    checkpoint_name = "lstm_basic_embedding"
    if config["action"] == "train":
        model.setCheckpoint(checkpoint_dir, checkpoint_name,
                            config["checkpoint_interval"])
    model.loadCheckpointWithLowestLoss(checkpoint_dir, checkpoint_name)

if config["action"] == "train":