else:
    lstmattn = AttentionLSTM(embedding_dim, hidden_dim,num_layers,output_size,dropout)
print(lstmattn)

tokenizer = ''
# if load_model is True, then load the pre-trained model
if load_model:
    lstmattn.load_state_dict(torch.load(load_model_path))
    with open('./output/lstmattn/lstmattn_tokenizer.pickle', 'rb') as handle:
        tokenizer = pickle.load(handle)


# to get training data and test data
X_train, y_train, X_test, y_test, tokenizer = data_prepare.data_ready(dataset,labelset,data_size,vocabulary_size,sequence_length,train_size,load_model,tokenizer)
# using google news w2v as word embedding model
embedding_matrix = data_prepare.load_w2v(w2v_file, w2v_bi, vocabulary_size, embedding_dim,tokenizer)

with open('./output/lstmattn/lstmattn_tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL) # save the model

optimizer = torch.optim.Adam(lstmattn.parameters(), lr=lr)   # define a optimizer for backpropagation
loss_func = nn.CrossEntropyLoss()   # define loss funtion
start = time()

training_loss, training_acc, test_acc =  train.train_with_w2v(num_epoch,train_size,batch_size,optimizer,X_train,y_train,sequence_length,embedding_dim,embedding_matrix,lstmattn,test_size,loss_func,X_test,y_test,"lstmattn",batch_first,cuda_gpu)

end = time()
print("time",end - start)

with open('./output/lstmattn/train_loss_lstmattn.json', 'w') as outfile1:
    json.dump(training_loss, outfile1)  # write training loss results to a json file
Exemplo n.º 2
0
tokenizer = ''

# if load_model is True, then load the pre-trained model
if load_model:
    cnn.load_state_dict(torch.load(load_model_path))
    with open('./output/cnn/cnn_tokenizer.pickle', 'rb') as handle:
        tokenizer = pickle.load(handle)

# to get training data and test data
X_train, y_train, X_test, y_test, tokenizer = data_prepare.data_ready(
    dataset, labelset, data_size, vocabulary_size, sequence_length, train_size,
    load_model, tokenizer)

# using google news w2v as word embedding model
embedding_matrix = data_prepare.load_w2v(
    w2v_file, w2v_bi, vocabulary_size, embedding_dim,
    tokenizer)  # use the google w2v vector as the embedding layer

with open('./output/cnn/cnn_tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle,
                protocol=pickle.HIGHEST_PROTOCOL)  # save the model

optimizer = torch.optim.Adam(cnn.parameters(),
                             lr=lr)  # define a optimizer for backpropagation
loss_func = nn.CrossEntropyLoss()  # define loss funtion

start = time()

# to get the training loss and test accuracy by using the train_with_w2v function of the train.py module
training_loss, training_acc, test_acc = train.train_with_w2v(
    num_epoch, train_size, batch_size, optimizer, X_train, y_train,