Пример #1
0
def main(_):
    data = reader.build_data(data_dir)
    train_data = data['train_data']
    valid_data = data['valid_data']
    word_embedding = data['word_embedding']

    train_data_producer = reader.DataProducer(train_data)
    valid_data_producer = reader.DataProducer(valid_data, False)

    # BiGRU2Layers
    graph = gru.GRU(vocab_size=len(word_embedding),
                    class_size=1,
                    word_vectors=word_embedding)
    graph.train(train_data_producer, valid_data_producer, 10)
Пример #2
0
def main(_):
    # Data Producer
    data = reader.build_data(data_dir)
    test_data = data['test_data']
    word_embedding = data['word_embedding']
    data_producer = reader.DataProducer(test_data, False)

    # GRU
    model_path = '../save_models/gru_6328'
    graph = gru.GRU(vocab_size=len(word_embedding),
                    class_size=1,
                    word_vectors=word_embedding,
                    batch=100,
                    training=False)
    print graph.evaluate(data_producer, model_path)
Пример #3
0
def create_model(
        batch_first,
        bptt,
        model_type,
        model_cfg,
        device,
        input_size,
        output_size,
        hidden_size,
        default_model_params):
    """Wrapper for creating models with default hyperparameters."""
    # Consider putting these in a immutable-across-experiments class
    for idx, (k, v) in enumerate(default_model_params.items()):
        v['input_size'] = input_size
        v['output_size'] = output_size
        v['hidden_size'] = hidden_size
        v['batch_first'] = batch_first
        v['bptt'] = bptt
        default_model_params[k] = v
    dct = default_model_params[model_cfg]
    if model_type.lower() == "transformer":
        dct = default_model_params[model_cfg]
        model = transformer.TransformerModel(**dct)
    elif model_type.lower() == "lstm":
        model = lstm.LSTM(**dct)
    elif model_type.lower() == "gru":
        model = gru.GRU(**dct)
    elif model_type.lower() == "grup":
        model = grup.GRUP(**dct)
    elif model_type.lower() == "grup_control":
        model = grup_control.GRUP_CONTROL(**dct)
    elif model_type.lower() == "ff":
        model = ff.FF(**dct)
    elif model_type.lower() == "linear":
        model = linear.linear(**dct)
    elif model_type.lower() == "ds":
        model = ds.DS(**dct)
    else:
        raise NotImplementedError(model_type)
    return model.to(device)
Пример #4
0
def generate_model_and_transformers(params, class_dict):
    """
    Pick and construct the model and the init and drop transformers given the params, the init transformer
    makes it so that the data in the PytorchDataset is in the tensors of shape and sizes needed, the drop transformer
    randomly drops tokens at run time when a sample is returned from the dataset, to simulate unknown words.
    Also deals with selecting the right device and putting the model on that device, GPU is preferred if available.
    :return: model, data transformer at dataset initialization, data transformer at run time
    """
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    w2v_vocab, w2v_weights = w2v_matrix_vocab_generator(params["w2v"])
    c2v_vocab = None
    c2v_weights = None

    if params["c2v"] is not None:
        c2v_vocab, c2v_weights = w2v_matrix_vocab_generator(params["c2v"])

    init_data_transform = data_manager.InitTransform(w2v_vocab, class_dict,
                                                     c2v_vocab)
    drop_data_transform = data_manager.DropTransform(0.001, w2v_vocab["<UNK>"],
                                                     w2v_vocab["<padding>"])

    # needed for some models, given their architecture, i.e. CONV
    padded_sentence_length = 50
    # needed by models when using c2v embeddings
    padded_word_length = 30
    if params["model"] == "lstm":
        model = lstm.LSTM(device, w2v_weights, params["hidden_size"],
                          len(class_dict), params["drop"],
                          params["bidirectional"], not params["unfreeze"],
                          params["embedding_norm"], c2v_weights,
                          padded_word_length)
    elif params["model"] == "gru":
        model = gru.GRU(device, w2v_weights, params["hidden_size"],
                        len(class_dict), params["drop"],
                        params["bidirectional"], not params["unfreeze"],
                        params["embedding_norm"], c2v_weights,
                        padded_word_length)
    elif params["model"] == "rnn":
        model = rnn.RNN(device, w2v_weights, params["hidden_size"],
                        len(class_dict), params["drop"],
                        params["bidirectional"], not params["unfreeze"],
                        params["embedding_norm"], c2v_weights,
                        padded_word_length)
    elif params["model"] == "lstm2ch":
        model = lstm2ch.LSTM2CH(device, w2v_weights, params["hidden_size"],
                                len(class_dict), params["drop"],
                                params["bidirectional"],
                                params["embedding_norm"])
    elif params["model"] == "encoder":
        tag_embedding_size = 20
        model = encoder.EncoderDecoderRNN(
            device, w2v_weights, tag_embedding_size, params["hidden_size"],
            len(class_dict), params["drop"], params["bidirectional"],
            not params["unfreeze"], params["embedding_norm"],
            params["embedding_norm"])
    elif params["model"] == "attention":
        tag_embedding_size = 20
        model = attention.Attention(
            device,
            w2v_weights,
            tag_embedding_size,
            params["hidden_size"],
            len(class_dict),
            params["drop"],
            params["bidirectional"],
            not params["unfreeze"],
            params["embedding_norm"],
            params["embedding_norm"],
            padded_sentence_length=padded_sentence_length)
    elif params["model"] == "conv":
        model = conv.CONV(device, w2v_weights, params["hidden_size"],
                          len(class_dict), padded_sentence_length,
                          params["drop"], params["bidirectional"],
                          not params["unfreeze"], params["embedding_norm"])
    elif params["model"] == "fcinit":
        model = fcinit.FCINIT(device, w2v_weights, params["hidden_size"],
                              len(class_dict), padded_sentence_length,
                              params["drop"], params["bidirectional"],
                              not params["unfreeze"], params["embedding_norm"])
    elif params["model"] == "lstmcrf":
        model = lstmcrf.LstmCrf(device, w2v_weights, class_dict,
                                params["hidden_size"], params["drop"],
                                params["bidirectional"],
                                not params["unfreeze"],
                                params["embedding_norm"], c2v_weights,
                                padded_word_length)

    model = model.to(device)

    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    print("total trainable parameters %i" % params)
    return model, init_data_transform, drop_data_transform