def do_shell(args): config = Config(args.model_path) helper = ModelHelper.load(args.model_path) embeddings = load_embeddings(args, helper) config.embed_size = embeddings.shape[1] with tf.Graph().as_default(): logger.info("Building model...", ) start = time.time() model = WindowModel(helper, config, embeddings) logger.info("took %.2f seconds", time.time() - start) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as session: session.run(init) saver.restore(session, model.config.model_output) print("""Welcome! You can use this shell to explore the behavior of your model. Please enter sentences with spaces between tokens, e.g., input> Germany 's representative to the European Union 's veterinary committee . """) while True: # Create simple REPL try: sentence = raw_input("input> ") tokens = sentence.strip().split(" ") for sentence, _, predictions in model.output(session, [(tokens, ["O"] * len(tokens))]): predictions = [LBLS[l] for l in predictions] print_sentence(sys.stdout, sentence, [""] * len(tokens), predictions) except EOFError: print("Closing session.") break
def do_evaluate(args): config = Config(args.model_path) helper = ModelHelper.load(args.model_path) input_data = read_conll(args.data) embeddings = load_embeddings(args, helper) config.embed_size = embeddings.shape[1] with tf.Graph().as_default(): logger.info("Building model...", ) start = time.time() model = WindowModel(helper, config, embeddings) logger.info("took %.2f seconds", time.time() - start) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as session: session.run(init) saver.restore(session, model.config.model_output) for sentence, labels, predictions in model.output(session, input_data): predictions = [LBLS[l] for l in predictions] print_sentence(args.output, sentence, labels, predictions)
def do_evaluate(args): config = Config(args.model_path) helper = ModelHelper.load(args.model_path) input_data = read_conll(args.data) embeddings = load_embeddings(args, helper) config.embed_size = embeddings.shape[1] with tf.Graph().as_default(): logger.info("Building model...", ) start = time.time() model = WindowModel(helper, config, embeddings) logger.info("took %.2f seconds", time.time() - start) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as session: session.run(init) saver.restore(session, model.config.model_output) for sentence, labels, predictions in model.output( session, input_data): predictions = [LBLS[l] for l in predictions] print_sentence(args.output, sentence, labels, predictions)
def do_train(args): # Set up some parameters. config = Config() # helper, train, dev, train_raw, dev_raw = load_and_preprocess_data(args) -- REPLACE THIS FUNCTION! ## REPLACEMENT # Load the data load_start = time.localtime() print(str(load_start) + ": Loading data ...") helper = load_pickle('data/py2_data/public_companies_helper.pickle' ) # created by words_to_vecs.py helper = ModelHelper(helper, config.rev_length) # Used for testing toy models data = load_pickle('data/test_data.pickle') # created by words_to_vecs.py cutoff = int(3 * len(data) / 4) train = data[0:cutoff] train_raw = train dev = data[cutoff:] dev_raw = dev # # Used for actual train/dev/test # train = load_pickle('data/py3_data/public_companies_train_data.pickle') # train = train[0:100000] # dev = train[200000:210000] # test = train[300000:310000] # Old data # dev = load_pickle('data/py3_data/public_companies_dev_data.pickle') # cut = int(len(dev)/2) # dev = dev[0:cut] # test = load_pickle('data/py3_data/public_companies_test_data.pickle') load_end = time.localtime() print(str(load_end) + ": Finished loading data ...") # embeddings = load_embeddings(args, helper) -- REPLACE THIS FUNCTION if config.embed_type == 0: # Use integer embedding. One hot encoding takes up too much memory embeddings = range(0, 10000) elif config.embed_type == 1: embeddings = load_pickle( 'data/py2_data/public_companies_word2vec_embeddings.pickle') embeddings = embeddings.astype(np.float32) elif config.embed_type == 2: embeddings = load_pickle( 'data/py2_data/public_companies_glove_embeddings.pickle') elif config.embed_type == 3: embeddings = load_pickle('data/test_cove_embeddings.pickle') else: print("Invaid embedding type:", config.embed_type, ". Debugging...") code.interact(local=locals()) ## REPLACEMENT # - Skip for first test if config.embed_type == 0: config.embed_size = 1 # FOR AN INITIAL TEST RUN JUST SET THE EMBEDDING SIZE TO 1 (i.e. just use the integer vectors that are passed in) else: config.embed_size = embeddings.shape[1] # # OLD CODE # helper.save(config.output_path) ## REPLACEMENT if not os.path.exists(config.output_path): os.makedirs(config.output_path) # handler = logging.FileHandler(config.log_output) # handler.setLevel(logging.DEBUG) # handler.setFormatter(logging.Formatter('%(asctime)s:%(levelname)s: %(message)s')) # logging.getLogger().addHandler(handler) report = None #Report(Config.eval_output) with tf.Graph().as_default(): logger.info("Building model...", ) start = time.time() model = GlassdoorModel(helper, config, embeddings) logger.info("took %.2f seconds", time.time() - start) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as session: session.run(init) model.fit(session, saver, train, dev) print("Model fit complete. No other code to run...") code.interact(local=locals())