def main(): # Setup and get current configuration config = get_configuration() # Print parameters print_configuration() #Initialize class - preprocessing preprocess = Preprocessing(config=config) # Perform preprocessing train_input, train_length_input, train_labels, test_input, test_length_input = preprocess.prepare_data( ) # Initialize class and select mode and model - embeddings if config.mode != "infer": if config.emb_model == "glove": model_emb = GloVeModel(config=config, dict_vocab=preprocess.dict_vocab) else: model_emb = Word2VecModel(config=config, dict_vocab=preprocess.dict_vocab) # Fit corpus model_emb.fit_to_corpus() # Train embeddings model_emb.train() # Train model RNN_Model(config, preprocess.dict_vocab_reverse, train_input, train_length_input, train_labels, test_input, test_length_input)
def main(): # Setup and get current configuration config = get_configuration() # Print parameters print_configuration() # Perform preprocessing preprocess = Preprocessing(config=config) train_input_encoder, train_input_decoder, \ test_input_encoder, test_input_decoder, = preprocess.prepare_data() # Initialize model class - train or infer: select mode Seq2seq(config, train_input_encoder, train_input_decoder, test_input_encoder, test_input_decoder, preprocess.dict_vocab_reverse, mode=None)
from utils import generate_batches from utils import Preprocessing from configuration import get_configuration from configuration import print_configuration from models.basic_model import BasicModel from models.seq2seq_model import Seq2SeqModel from models.seq2seq_model import Seq2SeqModelAttention # Load configuration and create model config = get_configuration() model = Seq2SeqModelAttention(config) # Prepare vocabulary and triples data preprocess = Preprocessing(config=config) preprocess.create_vocabulary("Training_Shuffled_Dataset.txt") preprocess.prepare_data() # Preprocess Cornell data preprocess = Preprocessing(train_path_file="cornell_dataset.txt", test_path_file="Validation_Shuffled_Dataset.txt", train_path_file_target="input_train_cornell", test_path_file_target="input_test_triples", triples=False, config=config) preprocess.prepare_data() # Preprocess Twitter data preprocess = Preprocessing(train_path_file="twitter_dataset.txt", test_path_file="Validation_Shuffled_Dataset.txt", train_path_file_target="input_train_twitter", test_path_file_target="input_test_triples", triples=False, config=config)