示例#1
0
def do_shell(args):
    config = Config(args.model_path)
    helper = ModelHelper.load(args.model_path)
    embeddings = load_embeddings(args, helper)
    config.embed_size = embeddings.shape[1]

    with tf.Graph().as_default():
        logger.info("Building model...", )
        start = time.time()
        model = WindowModel(helper, config, embeddings)
        logger.info("took %.2f seconds", time.time() - start)

        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

        with tf.Session() as session:
            session.run(init)
            saver.restore(session, model.config.model_output)

            print("""Welcome!
You can use this shell to explore the behavior of your model.
Please enter sentences with spaces between tokens, e.g.,
input> Germany 's representative to the European Union 's veterinary committee .
""")
            while True:
                # Create simple REPL
                try:
                    sentence = raw_input("input> ")
                    tokens = sentence.strip().split(" ")
                    for sentence, _, predictions in model.output(session, [(tokens, ["O"] * len(tokens))]):
                        predictions = [LBLS[l] for l in predictions]
                        print_sentence(sys.stdout, sentence, [""] * len(tokens), predictions)
                except EOFError:
                    print("Closing session.")
                    break
示例#2
0
def do_evaluate(args):
    config = Config(args.model_path)
    helper = ModelHelper.load(args.model_path)
    input_data = read_conll(args.data)
    embeddings = load_embeddings(args, helper)
    config.embed_size = embeddings.shape[1]

    with tf.Graph().as_default():
        logger.info("Building model...", )
        start = time.time()
        model = WindowModel(helper, config, embeddings)

        logger.info("took %.2f seconds", time.time() - start)

        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

        with tf.Session() as session:
            session.run(init)
            saver.restore(session, model.config.model_output)
            for sentence, labels, predictions in model.output(session, input_data):
                predictions = [LBLS[l] for l in predictions]
                print_sentence(args.output, sentence, labels, predictions)
示例#3
0
def do_evaluate(args):
    config = Config(args.model_path)
    helper = ModelHelper.load(args.model_path)
    input_data = read_conll(args.data)
    embeddings = load_embeddings(args, helper)
    config.embed_size = embeddings.shape[1]

    with tf.Graph().as_default():
        logger.info("Building model...", )
        start = time.time()
        model = WindowModel(helper, config, embeddings)

        logger.info("took %.2f seconds", time.time() - start)

        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

        with tf.Session() as session:
            session.run(init)
            saver.restore(session, model.config.model_output)
            for sentence, labels, predictions in model.output(
                    session, input_data):
                predictions = [LBLS[l] for l in predictions]
                print_sentence(args.output, sentence, labels, predictions)
示例#4
0
def do_train(args):
    # Set up some parameters.
    config = Config()
    # helper, train, dev, train_raw, dev_raw = load_and_preprocess_data(args) -- REPLACE THIS FUNCTION!
    ## REPLACEMENT
    # Load the data
    load_start = time.localtime()
    print(str(load_start) + ": Loading data ...")

    helper = load_pickle('data/py2_data/public_companies_helper.pickle'
                         )  # created by words_to_vecs.py
    helper = ModelHelper(helper, config.rev_length)

    # Used for testing toy models
    data = load_pickle('data/test_data.pickle')  # created by words_to_vecs.py
    cutoff = int(3 * len(data) / 4)
    train = data[0:cutoff]
    train_raw = train
    dev = data[cutoff:]
    dev_raw = dev

    # # Used for actual train/dev/test
    # train = load_pickle('data/py3_data/public_companies_train_data.pickle')
    # train = train[0:100000]
    # dev = train[200000:210000]
    # test = train[300000:310000]

    # Old data
    # dev = load_pickle('data/py3_data/public_companies_dev_data.pickle')
    # cut = int(len(dev)/2)
    # dev = dev[0:cut]
    # test = load_pickle('data/py3_data/public_companies_test_data.pickle')

    load_end = time.localtime()
    print(str(load_end) + ": Finished loading data ...")

    # embeddings = load_embeddings(args, helper) -- REPLACE THIS FUNCTION
    if config.embed_type == 0:
        # Use integer embedding. One hot encoding takes up too much memory
        embeddings = range(0, 10000)
    elif config.embed_type == 1:
        embeddings = load_pickle(
            'data/py2_data/public_companies_word2vec_embeddings.pickle')
        embeddings = embeddings.astype(np.float32)
    elif config.embed_type == 2:
        embeddings = load_pickle(
            'data/py2_data/public_companies_glove_embeddings.pickle')
    elif config.embed_type == 3:
        embeddings = load_pickle('data/test_cove_embeddings.pickle')
    else:
        print("Invaid embedding type:", config.embed_type, ". Debugging...")
        code.interact(local=locals())

    ## REPLACEMENT
    # - Skip for first test
    if config.embed_type == 0:
        config.embed_size = 1  # FOR AN INITIAL TEST RUN JUST SET THE EMBEDDING SIZE TO 1 (i.e. just use the integer vectors that are passed in)
    else:
        config.embed_size = embeddings.shape[1]
    # # OLD CODE
    # helper.save(config.output_path)
    ## REPLACEMENT
    if not os.path.exists(config.output_path):
        os.makedirs(config.output_path)

    # handler = logging.FileHandler(config.log_output)
    # handler.setLevel(logging.DEBUG)
    # handler.setFormatter(logging.Formatter('%(asctime)s:%(levelname)s: %(message)s'))
    # logging.getLogger().addHandler(handler)

    report = None  #Report(Config.eval_output)

    with tf.Graph().as_default():
        logger.info("Building model...", )
        start = time.time()
        model = GlassdoorModel(helper, config, embeddings)
        logger.info("took %.2f seconds", time.time() - start)
        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

        with tf.Session() as session:
            session.run(init)

            model.fit(session, saver, train, dev)
            print("Model fit complete. No other code to run...")
            code.interact(local=locals())