def main(_): word_to_id, word_embedding = reader.load_vocabulary(word_embedding_path) train_data_producer = reader.DataProducer(word_to_id, train_path) valid_data_producer = reader.DataProducer(word_to_id, valid_path, False) graph = bigru.BiGRU(len(word_embedding), word_embedding) graph.train(train_data_producer, valid_data_producer, 100)
def main(_): word_to_id, word_embedding = reader.load_vocabulary(word_embedding_path) test_data_producer = reader.DataProducer(word_to_id, test_path, False) model_path = '../save_models/bigru2layers_crf_50472' graph = bigru2layers_crf.BiGRU2LayersCRF(len(word_embedding), 2, word_embedding, batch=1) print graph.evaluate(test_data_producer, model_path)
def main(_): id2tagsinwords_map = reader.buildID2TagsInWordsMap(relation_data_dir) word_to_id, word_embedding = reader.load_vocabulary(word_embedding_path) relation_embedding = reader.load_relation_embeddings( relation_embedding_path) train_data_producer = reader.DataProducer(id2tagsinwords_map, word_to_id, train_path, 1024) valid_data_producer = reader.DataProducer(id2tagsinwords_map, word_to_id, valid_path, 1024, False) graph = bigru2layers_dev.BiGRU2LayersDev(len(word_embedding), len(relation_embedding), word_embedding, relation_embedding) graph.train(train_data_producer, valid_data_producer, 100)
def main(_): id2tagsinwords_map = reader.buildID2TagsInWordsMap(relation_data_dir) word_to_id, word_embedding = reader.load_vocabulary(word_embedding_path) relation_embedding = reader.load_relation_embeddings( relation_embedding_path) test_data_producer = reader.DataProducer(id2tagsinwords_map, word_to_id, test_path, 1024, False) # BiGRU2LayersDev model_path = '../save_models/bigru2layers_dev_14421' graph = bigru2layers_dev.BiGRU2LayersDev(len(word_embedding), len(relation_embedding), word_embedding, relation_embedding, batch=1) print graph.evaluate(test_data_producer, model_path)
def __init__(self, graph_def_path, vacab_path): prefix = "import/" self.input_word_ids = prefix + "Test/Model/input_word_ids:0" self.model_length = prefix + "Test/Model/seq_len:0" self.state_in = prefix + "Test/Model/Model/state_in:0" self.state_out = prefix + "Test/Model/Model/state_out:0" self.top_k = prefix + "Test/Model/top_k:0" self.model_probs = prefix + "Test/Model/Model/probabilities:0" self.model_top_k_result = prefix + "Test/Model/Model/top_k_prediction:1" with open(graph_def_path, "rb") as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) tf.import_graph_def(graph_def) self.word_to_id, self.id_to_word = reader.load_vocabulary(vacab_path)
import os import reader import tensorflow as tf import numpy as np app = Flask(__name__) relation_data_dir = '../../data/relation' model_path = '../save_models/bigru2layers_dev_14421' word_embedding_path = '../../../data/glove/glove.6B.300d.txt' relation_embedding_path = '../../data/transE/relation_embeddings.txt' os.environ['CUDA_VISIBLE_DEVICES'] = '' id2tagsinwords_map = reader.buildID2TagsInWordsMap(relation_data_dir) word_to_id, word_embedding = reader.load_vocabulary(word_embedding_path) relation_embedding = reader.load_relation_embeddings(relation_embedding_path) graph = bigru2layers_dev.BiGRU2LayersDev(len(word_embedding), len(relation_embedding), word_embedding, relation_embedding, batch=1) #graph = bigru.BiGRU(len(word_embedding), len(relation_embedding), # word_embedding, relation_embedding, batch=1) #graph = bigru2layers.BiGRU2Layers(len(word_embedding), len(relation_embedding), # word_embedding, relation_embedding, batch=1) sess = tf.Session() sess.run(tf.global_variables_initializer()) saver = tf.train.Saver()
if not is_special: common_words |= set(alphanumerics) # with open(COMMON_WORDS_SAVEPATH, 'w') as common_file: # for w in common_words: # common_file.write(w + '\n') # # with open(SPECIAL_WORDS_SAVEPATH, 'w') as special_file: # for w in special_words: # special_file.write(w + '\n') return special_words, common_words if __name__ == '__main__': vocabulary = load_vocabulary() unique_special_words = set() unique_common_words = set() ######################### ##### Training Data ##### ######################### ## load tokens extracted from train text with open(config.train_tokens_path) as text_file: text_dict = json.load(text_file) print('complete loading processed training text') special_words, common_words = extract_special_words_from_docs(text_dict, vocabulary) ## save common and special words extracted with open(config.special_words_train_savepath, 'w') as output_file: json.dump(special_words, output_file, indent=2)