Exemplo n.º 1
0
x_test = test_data
y_test = test_labels
x_train_de = de_train_data
y_train_de = de_train_labels
x_val_de = de_dev_data
y_val_de = de_dev_labels
x_test_de = de_test_data
y_test_de = de_test_labels

# tests
# print(x_train[:3])
# print(x_test[:3])

EMBEDDING_DIM = 100

embeddings_index = utils.load_embs_2_dict('EMBEDDINGS/EN_ES.txt.w2v')

embedding_matrix = utils.build_emb_matrix(num_embedding_vocab=vocab_size,
                                          embedding_dim=EMBEDDING_DIM,
                                          word_index=tokenizer.word_index,
                                          embeddings_index=embeddings_index)

global_en_mic_train = 0
global_de_mic_train = 0
global_en_mac_train = 0
global_de_mac_train = 0
global_en_mic_tune = 0
global_de_mic_tune = 0
global_en_mac_tune = 0
global_de_mac_tune = 0
num_iterations = 8
x_test_es = es_test_data
y_test_es = es_test_labels
x_test_hu = hu_test_data
y_test_hu = hu_test_labels
x_test_sk = sk_test_data
y_test_sk = sk_test_labels
x_test_sv = sv_test_data
y_test_sv = sv_test_labels
x_test_it = it_test_data
y_test_it = it_test_labels
x_test_pt = pt_test_data
y_test_pt = pt_test_labels

EMBEDDING_DIM = 300

embeddings_index = utils.load_embs_2_dict(
    'EMBEDDINGS/EN_DE_ES_HU_SK_SV_IT_PT.txt', dim=EMBEDDING_DIM)

embedding_matrix = utils.build_emb_matrix(num_embedding_vocab=vocab_size,
                                          embedding_dim=EMBEDDING_DIM,
                                          word_index=tokenizer.word_index,
                                          embeddings_index=embeddings_index)

global_en_mic_train = 0
global_en_mac_train = 0

global_de_mic_train = 0
global_de_mac_train = 0

global_es_mic_train = 0
global_es_mac_train = 0
Exemplo n.º 3
0
y_train_de = de_train_labels
x_val_de = de_dev_data
y_val_de = de_dev_labels
x_test_de = de_test_data
y_test_de = de_test_labels

# tests
print(x_train[:3])
print(x_test[:3])

EMBEDDING_DIM = 200

# embeddings_index = utils.load_embs_2_dict('EMBEDDINGS/EN_DE.txt.w2v')
# embeddings_index = utils.load_embs_2_dict('EMBEDDINGS/EN_ES.txt.w2v')
# embeddings_index = utils.load_embs_2_dict('EMBEDDINGS/EN_DE_HU_SK_SV.txt', dim=300)
embeddings_index = utils.load_embs_2_dict(
    'EMBEDDINGS/glove.twitter.27B.200d.txt', dim=EMBEDDING_DIM)

embedding_matrix = utils.build_emb_matrix(num_embedding_vocab=vocab_size,
                                          embedding_dim=EMBEDDING_DIM,
                                          word_index=tokenizer.word_index,
                                          embeddings_index=embeddings_index)

global_en_mic_train = 0
global_de_mic_train = 0
global_en_mac_train = 0
global_de_mac_train = 0
global_en_mic_tune = 0
global_de_mic_tune = 0
global_en_mac_tune = 0
global_de_mac_tune = 0
num_iterations = 8
Exemplo n.º 4
0
y_train = np.concatenate((train_labels, hu_labels, sk_labels, sv_labels))
x_val = dev_data
y_val = dev_labels
x_test = test_data
y_test = test_labels

x_train_de = de_train_data
y_train_de = de_train_labels
x_val_de = de_dev_data
y_val_de = de_dev_labels
x_test_de = de_test_data
y_test_de = de_test_labels

EMBEDDING_DIM = 300

embeddings_index = utils.load_embs_2_dict('EMBEDDINGS/EN_DE_HU_SK_SV.txt', dim=300)
embedding_matrix = utils.build_emb_matrix(num_embedding_vocab=vocab_size, embedding_dim=EMBEDDING_DIM, word_index=tokenizer.word_index, embeddings_index=embeddings_index)

global_en_mic_train = 0
global_de_mic_train = 0
global_en_mac_train = 0
global_de_mac_train = 0
global_en_mic_tune = 0
global_de_mic_tune = 0
global_en_mac_tune = 0
global_de_mac_tune = 0
num_iterations = 10

for i in range(num_iterations):
    print('training iteration:', i + 1)
Exemplo n.º 5
0
y_train = np.concatenate((train_labels, it_labels, pt_labels))
x_val = dev_data
y_val = dev_labels
x_test = test_data
y_test = test_labels

x_train_de = de_train_data
y_train_de = de_train_labels
x_val_de = de_dev_data
y_val_de = de_dev_labels
x_test_de = de_test_data
y_test_de = de_test_labels

EMBEDDING_DIM = 300

embeddings_index = utils.load_embs_2_dict('EMBEDDINGS/EN_ES_IT_PT.txt',
                                          dim=300)
embedding_matrix = utils.build_emb_matrix(num_embedding_vocab=vocab_size,
                                          embedding_dim=EMBEDDING_DIM,
                                          word_index=tokenizer.word_index,
                                          embeddings_index=embeddings_index)

global_en_mic_train = 0
global_de_mic_train = 0
global_en_mac_train = 0
global_de_mac_train = 0
global_en_mic_tune = 0
global_de_mic_tune = 0
global_en_mac_tune = 0
global_de_mac_tune = 0
num_iterations = 10