Exemplo n.º 1
0
def sample(model, song, config):
    """
    # This function takes the desired output length and input characters as arguments, returning the produced sentence
    :param config: Dict of settings
    :param model: nn.Module
    :param song: String
    :param limit: Int
    :return: String (new generated song)
    """
    char_to_idx, idx_to_char = char_mapping()
    model.eval()

    i = 0
    while song[-1] != '%' and i < config["LIMIT_LEN"]:
        char = predict(model, song, config, char_to_idx, idx_to_char)
        song += char
        i += 1

    return song
Exemplo n.º 2
0
    print('Number of development samples:' + str(len(dev_set)))

    print("Number of node: " + str(len_node) + ", while max allowed is " + str(options.max_node_num))
    print("Number of parent node: " + str(len_in_node) + ", truncated to " + str(options.max_in_node_num))
    print("Number of child node: " + str(len_out_node) + ", truncated to " + str(options.max_out_node_num))
    print("The entity size: " + str(entity_size) + ", truncated to " + str(options.max_entity_size))

    # Build dictionary and mapping of words, characters, edges

    words, chars, edges = collect_data(train_set)
    print('Number of words:' + str(len(words)))
    print('Number of characters:' + str(len(chars)))
    print('Number of edges:' + str(len(edges)))

    dict_word, word_to_id, id_to_word = word_mapping(words)
    dict_char, char_to_id, id_to_char = char_mapping(chars)
    dict_edge, edge_to_id, id_to_edge = edge_mapping(edges)

    options.word_to_id = word_to_id
    options.char_to_id = char_to_id
    options.edge_to_id = edge_to_id

    if options.binary_classification:
        options.relation_num = 2
    else:
        options.relation_num = 6

    train_set = get_dataset_from_instances(train_set, word_to_id, char_to_id, edge_to_id, options)
    dev_set = get_dataset_from_instances(dev_set, word_to_id, char_to_id, edge_to_id, options)

    #  Build dataloader of training set and development set
Exemplo n.º 3
0
import torch
from torch.nn import CrossEntropyLoss
from models import LSTMSimple, VanillaRNN
from utils import read_songs_from, char_mapping, encode_songs, get_device, negative_log_likelihood
from datetime import datetime
from train import fit
from plotting import save_loss_graph


def load_data(file):
    songs = read_songs_from('data/' + file)
    songs_encoded = encode_songs(songs, char_to_idx)
    return songs, songs_encoded


char_to_idx, idx_to_char = char_mapping()

train, train_encoded = load_data('train.txt')
val, val_encoded = load_data('val.txt')
test, test_encoded = load_data('test.txt')

config = {
    "EPOCHS": 15,
    "CHUNK_SIZE": 100,
    "VOCAB_SIZE": len(char_to_idx.keys()),
    "LR": 0.001,  # Default in Adam 0.001,
    "WEIGHT_DECAY": 0,  # Default in Adam 0
    "HIDDEN": 100,

    # For songs sampling
    "TEMPERATURE": 1,
Exemplo n.º 4
0
        # mapping of words frenquency decreasing
        dico_words_train = word_mapping(train_sentences,
                                        parameters["lower"])[0]
        dico_words, word_to_id, id_to_word = augment_with_pretrained(
            dico_words_train.copy(), wordmodel,
            list(
                itertools.chain.from_iterable([[w[0] for w in s]
                                               for s in dt_sentences]))
            if not parameters['all_emb'] else None)
    else:
        dico_words, word_to_id, id_to_word = word_mapping(
            train_sentences, parameters["lower"])
        dico_words_train = dico_words

    # Create a dictionary and a mapping for words / POS tags / tags
    dico_chars, char_to_id, id_to_char = char_mapping(train_sentences)
    dico_tags, tag_to_id, id_to_tag = tag_mapping(train_sentences)
    dico_pts, pt_to_id, id_to_pt = pt_mapping(train_sentences + dev_sentences)
    if not os.path.exists(os.path.join(models_path, model_name)):
        os.makedirs(os.path.join(models_path, model_name))
    save_mappings(os.path.join(models_path, model_name,
                               'mappings.pkl'), word_to_id, char_to_id,
                  tag_to_id, pt_to_id, dico_words, id_to_tag)
else:
    word_to_id, char_to_id, tag_to_id, pt_to_id, dico_words, id_to_tag = reload_mappings(
        os.path.join(models_path, model_name, 'mappings.pkl'))
    dico_words_train = dico_words
    id_to_word = {v: k for k, v in word_to_id.items()}

# Index sentences
m3 = 0