def sample(model, song, config): """ # This function takes the desired output length and input characters as arguments, returning the produced sentence :param config: Dict of settings :param model: nn.Module :param song: String :param limit: Int :return: String (new generated song) """ char_to_idx, idx_to_char = char_mapping() model.eval() i = 0 while song[-1] != '%' and i < config["LIMIT_LEN"]: char = predict(model, song, config, char_to_idx, idx_to_char) song += char i += 1 return song
print('Number of development samples:' + str(len(dev_set))) print("Number of node: " + str(len_node) + ", while max allowed is " + str(options.max_node_num)) print("Number of parent node: " + str(len_in_node) + ", truncated to " + str(options.max_in_node_num)) print("Number of child node: " + str(len_out_node) + ", truncated to " + str(options.max_out_node_num)) print("The entity size: " + str(entity_size) + ", truncated to " + str(options.max_entity_size)) # Build dictionary and mapping of words, characters, edges words, chars, edges = collect_data(train_set) print('Number of words:' + str(len(words))) print('Number of characters:' + str(len(chars))) print('Number of edges:' + str(len(edges))) dict_word, word_to_id, id_to_word = word_mapping(words) dict_char, char_to_id, id_to_char = char_mapping(chars) dict_edge, edge_to_id, id_to_edge = edge_mapping(edges) options.word_to_id = word_to_id options.char_to_id = char_to_id options.edge_to_id = edge_to_id if options.binary_classification: options.relation_num = 2 else: options.relation_num = 6 train_set = get_dataset_from_instances(train_set, word_to_id, char_to_id, edge_to_id, options) dev_set = get_dataset_from_instances(dev_set, word_to_id, char_to_id, edge_to_id, options) # Build dataloader of training set and development set
import torch from torch.nn import CrossEntropyLoss from models import LSTMSimple, VanillaRNN from utils import read_songs_from, char_mapping, encode_songs, get_device, negative_log_likelihood from datetime import datetime from train import fit from plotting import save_loss_graph def load_data(file): songs = read_songs_from('data/' + file) songs_encoded = encode_songs(songs, char_to_idx) return songs, songs_encoded char_to_idx, idx_to_char = char_mapping() train, train_encoded = load_data('train.txt') val, val_encoded = load_data('val.txt') test, test_encoded = load_data('test.txt') config = { "EPOCHS": 15, "CHUNK_SIZE": 100, "VOCAB_SIZE": len(char_to_idx.keys()), "LR": 0.001, # Default in Adam 0.001, "WEIGHT_DECAY": 0, # Default in Adam 0 "HIDDEN": 100, # For songs sampling "TEMPERATURE": 1,
# mapping of words frenquency decreasing dico_words_train = word_mapping(train_sentences, parameters["lower"])[0] dico_words, word_to_id, id_to_word = augment_with_pretrained( dico_words_train.copy(), wordmodel, list( itertools.chain.from_iterable([[w[0] for w in s] for s in dt_sentences])) if not parameters['all_emb'] else None) else: dico_words, word_to_id, id_to_word = word_mapping( train_sentences, parameters["lower"]) dico_words_train = dico_words # Create a dictionary and a mapping for words / POS tags / tags dico_chars, char_to_id, id_to_char = char_mapping(train_sentences) dico_tags, tag_to_id, id_to_tag = tag_mapping(train_sentences) dico_pts, pt_to_id, id_to_pt = pt_mapping(train_sentences + dev_sentences) if not os.path.exists(os.path.join(models_path, model_name)): os.makedirs(os.path.join(models_path, model_name)) save_mappings(os.path.join(models_path, model_name, 'mappings.pkl'), word_to_id, char_to_id, tag_to_id, pt_to_id, dico_words, id_to_tag) else: word_to_id, char_to_id, tag_to_id, pt_to_id, dico_words, id_to_tag = reload_mappings( os.path.join(models_path, model_name, 'mappings.pkl')) dico_words_train = dico_words id_to_word = {v: k for k, v in word_to_id.items()} # Index sentences m3 = 0