Exemplo n.º 1
0
def main():
    # Setup and get current configuration
    config = get_configuration()
    # Print parameters
    print_configuration()
    #Initialize class - preprocessing
    preprocess = Preprocessing(config=config)
    # Perform preprocessing
    train_input, train_length_input, train_labels, test_input, test_length_input = preprocess.prepare_data(
    )
    # Initialize class and select mode and model - embeddings
    if config.mode != "infer":
        if config.emb_model == "glove":
            model_emb = GloVeModel(config=config,
                                   dict_vocab=preprocess.dict_vocab)
        else:
            model_emb = Word2VecModel(config=config,
                                      dict_vocab=preprocess.dict_vocab)
        # Fit corpus
        model_emb.fit_to_corpus()
        # Train embeddings
        model_emb.train()
    # Train model
    RNN_Model(config, preprocess.dict_vocab_reverse, train_input,
              train_length_input, train_labels, test_input, test_length_input)
Exemplo n.º 2
0
def main():
    # Setup and get current configuration
    config = get_configuration()
    # Print parameters
    print_configuration()
    # Perform preprocessing
    preprocess = Preprocessing(config=config)
    train_input_encoder, train_input_decoder, \
    test_input_encoder, test_input_decoder, = preprocess.prepare_data()
    # Initialize model class - train or infer: select mode
    Seq2seq(config,
            train_input_encoder,
            train_input_decoder,
            test_input_encoder,
            test_input_decoder,
            preprocess.dict_vocab_reverse,
            mode=None)
Exemplo n.º 3
0
from utils import generate_batches
from utils import Preprocessing
from configuration import get_configuration
from configuration import print_configuration
from models.basic_model import BasicModel
from models.seq2seq_model import Seq2SeqModel
from models.seq2seq_model import Seq2SeqModelAttention

# Load configuration and create model
config = get_configuration()
model = Seq2SeqModelAttention(config)

# Prepare vocabulary and triples data
preprocess = Preprocessing(config=config)
preprocess.create_vocabulary("Training_Shuffled_Dataset.txt")
preprocess.prepare_data()
# Preprocess Cornell data
preprocess = Preprocessing(train_path_file="cornell_dataset.txt",
                           test_path_file="Validation_Shuffled_Dataset.txt",
                           train_path_file_target="input_train_cornell",
                           test_path_file_target="input_test_triples",
                           triples=False,
                           config=config)
preprocess.prepare_data()
# Preprocess Twitter data
preprocess = Preprocessing(train_path_file="twitter_dataset.txt",
                           test_path_file="Validation_Shuffled_Dataset.txt",
                           train_path_file_target="input_train_twitter",
                           test_path_file_target="input_test_triples",
                           triples=False,
                           config=config)