path_eval_sentences = os.path.join(args.data_dir, 'dev.pkl') # Load Vocabularies words = tf.contrib.lookup.index_table_from_file(path_words, num_oov_buckets=1) # Create the input data pipeline logging.info("Creating the datasets...") train_sentences = load_dataset_from_text(path_train_sentences) eval_sentences = load_dataset_from_text(path_eval_sentences) # Specify other parameters for the dataset and the model params.eval_size = params.dev_size params.buffer_size = params.train_size # buffer size for shuffling params.id_pad_word = words.lookup(tf.constant(params.pad_word)) # Create the two iterators over the two datasets train_inputs = input_fn('train', train_sentences, words, params) eval_inputs = input_fn('eval', eval_sentences, words, params) logging.info("- done.") # Define the models (2 different set of nodes that share weights for train and eval) logging.info("Creating the model...") train_model_spec = model_fn('train', train_inputs, params) eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True) logging.info("- done.") # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs))
logging.info("Loading pretrained Word2Vec...") word2vec = gensim.models.KeyedVectors.load_word2vec_format( args.word2vec_file, binary=True) logging.info("- done.") logging.info("Building datasets...") with open(os.path.join(args.data_dir, "symbols.txt")) as f_symbols: symbols = [line.strip() for line in f_symbols] train_signal_map = { symbol: load_signal(path_train, path_corpus, symbol) for symbol in symbols[:2] } dev_signal_map = { symbol: load_signal(path_dev, path_corpus, symbol) for symbol in symbols[:2] } params.eval_size = params.dev_size params.buffer_size = params.train_size train_inputs = input_fn("train", train_signal_map, word2vec, params) eval_inputs = input_fn("eval", dev_signal_map, word2vec, params) logging.info("- done.") logging.info("Creating the model...") train_model_spec = model_fn("train", train_inputs, params) eval_model_spec = model_fn("eval", eval_inputs, params, reuse=True) logging.info("- done.") logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir, params, args.restore_dir)