def main(_): data = reader.build_data(data_dir) train_data = data['train_data'] valid_data = data['valid_data'] word_embedding = data['word_embedding'] train_data_producer = reader.DataProducer(train_data) valid_data_producer = reader.DataProducer(valid_data, False) # BiGRU2Layers graph = gru.GRU(vocab_size=len(word_embedding), class_size=1, word_vectors=word_embedding) graph.train(train_data_producer, valid_data_producer, 10)
def main(_): # Data Producer data = reader.build_data(data_dir) test_data = data['test_data'] word_embedding = data['word_embedding'] data_producer = reader.DataProducer(test_data, False) # GRU model_path = '../save_models/gru_6328' graph = gru.GRU(vocab_size=len(word_embedding), class_size=1, word_vectors=word_embedding, batch=100, training=False) print graph.evaluate(data_producer, model_path)
def create_model( batch_first, bptt, model_type, model_cfg, device, input_size, output_size, hidden_size, default_model_params): """Wrapper for creating models with default hyperparameters.""" # Consider putting these in a immutable-across-experiments class for idx, (k, v) in enumerate(default_model_params.items()): v['input_size'] = input_size v['output_size'] = output_size v['hidden_size'] = hidden_size v['batch_first'] = batch_first v['bptt'] = bptt default_model_params[k] = v dct = default_model_params[model_cfg] if model_type.lower() == "transformer": dct = default_model_params[model_cfg] model = transformer.TransformerModel(**dct) elif model_type.lower() == "lstm": model = lstm.LSTM(**dct) elif model_type.lower() == "gru": model = gru.GRU(**dct) elif model_type.lower() == "grup": model = grup.GRUP(**dct) elif model_type.lower() == "grup_control": model = grup_control.GRUP_CONTROL(**dct) elif model_type.lower() == "ff": model = ff.FF(**dct) elif model_type.lower() == "linear": model = linear.linear(**dct) elif model_type.lower() == "ds": model = ds.DS(**dct) else: raise NotImplementedError(model_type) return model.to(device)
def generate_model_and_transformers(params, class_dict): """ Pick and construct the model and the init and drop transformers given the params, the init transformer makes it so that the data in the PytorchDataset is in the tensors of shape and sizes needed, the drop transformer randomly drops tokens at run time when a sample is returned from the dataset, to simulate unknown words. Also deals with selecting the right device and putting the model on that device, GPU is preferred if available. :return: model, data transformer at dataset initialization, data transformer at run time """ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") w2v_vocab, w2v_weights = w2v_matrix_vocab_generator(params["w2v"]) c2v_vocab = None c2v_weights = None if params["c2v"] is not None: c2v_vocab, c2v_weights = w2v_matrix_vocab_generator(params["c2v"]) init_data_transform = data_manager.InitTransform(w2v_vocab, class_dict, c2v_vocab) drop_data_transform = data_manager.DropTransform(0.001, w2v_vocab["<UNK>"], w2v_vocab["<padding>"]) # needed for some models, given their architecture, i.e. CONV padded_sentence_length = 50 # needed by models when using c2v embeddings padded_word_length = 30 if params["model"] == "lstm": model = lstm.LSTM(device, w2v_weights, params["hidden_size"], len(class_dict), params["drop"], params["bidirectional"], not params["unfreeze"], params["embedding_norm"], c2v_weights, padded_word_length) elif params["model"] == "gru": model = gru.GRU(device, w2v_weights, params["hidden_size"], len(class_dict), params["drop"], params["bidirectional"], not params["unfreeze"], params["embedding_norm"], c2v_weights, padded_word_length) elif params["model"] == "rnn": model = rnn.RNN(device, w2v_weights, params["hidden_size"], len(class_dict), params["drop"], params["bidirectional"], not params["unfreeze"], params["embedding_norm"], c2v_weights, padded_word_length) elif params["model"] == "lstm2ch": model = lstm2ch.LSTM2CH(device, w2v_weights, params["hidden_size"], len(class_dict), params["drop"], params["bidirectional"], params["embedding_norm"]) elif params["model"] == "encoder": tag_embedding_size = 20 model = encoder.EncoderDecoderRNN( device, w2v_weights, tag_embedding_size, params["hidden_size"], len(class_dict), params["drop"], params["bidirectional"], not params["unfreeze"], params["embedding_norm"], params["embedding_norm"]) elif params["model"] == "attention": tag_embedding_size = 20 model = attention.Attention( device, w2v_weights, tag_embedding_size, params["hidden_size"], len(class_dict), params["drop"], params["bidirectional"], not params["unfreeze"], params["embedding_norm"], params["embedding_norm"], padded_sentence_length=padded_sentence_length) elif params["model"] == "conv": model = conv.CONV(device, w2v_weights, params["hidden_size"], len(class_dict), padded_sentence_length, params["drop"], params["bidirectional"], not params["unfreeze"], params["embedding_norm"]) elif params["model"] == "fcinit": model = fcinit.FCINIT(device, w2v_weights, params["hidden_size"], len(class_dict), padded_sentence_length, params["drop"], params["bidirectional"], not params["unfreeze"], params["embedding_norm"]) elif params["model"] == "lstmcrf": model = lstmcrf.LstmCrf(device, w2v_weights, class_dict, params["hidden_size"], params["drop"], params["bidirectional"], not params["unfreeze"], params["embedding_norm"], c2v_weights, padded_word_length) model = model.to(device) model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print("total trainable parameters %i" % params) return model, init_data_transform, drop_data_transform