Exemplo n.º 1
0
def main(_):
    vocab = reader.get_vocab(FLAGS.vocab)
    test_ids, test_meta = reader.make_test(PDPATH('/test_data/'+FLAGS.test), vocab)
    model_path = PDPATH('/trained_models/') + FLAGS.model
    config = load_configs(model_path)


    with tf.Graph().as_default() as graph:
        with tf.Session() as session:
            test_input = TestData(config = config,
                                  test_data = test_ids,
                                  test_meta = test_meta,
                                  vocab=vocab,
                                  name="TestInput")

            with tf.variable_scope("Model"):
                mtest = Basic_LSTM_Model(is_training=False, config=config, input_=test_input)

            saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
            saved_files = os.listdir(model_path)
            for file in saved_files:
                if '.meta' in file:
                    ckpt = file.split(sep='.')[0]
                    saver.restore(session, os.path.join(model_path,ckpt))
                    continue


            np.set_printoptions(precision=4, suppress=False, linewidth=100)
            b = run_test(session=session, model=mtest, model_input=test_input)
            print(b)
            b = b / np.sum(b, axis=1).reshape([-1,1])
            np.set_printoptions(precision=4, suppress=False, linewidth=100)
            print(b)
Exemplo n.º 2
0
    def __init__(self,
                 data_bundle,
                 vocabulary=None):  #, c_len=None, q_len=None):
        (self.contexts, self.questions, self.choices, self.labels,
         self.choices_map, self.context_lens, self.qs_lens) = data_bundle
        if vocabulary:
            self.vocab = vocabulary
        else:
            self.vocab = rn.get_vocab(self.questions,
                                      self.contexts,
                                      min_frequency=10)
        self.vocab_size = len(self.vocab.vocabulary_)

        self.labels_idx = sorted(
            list(
                set([choice for choices in self.choices
                     for choice in choices])))
        print(self.contexts[:10])
        print(self.questions[:10])
        print(self.labels[:10])
        self.transformed_labels_idx = [
            x[0] for x in list(self.vocab.transform(self.labels_idx))
        ]
        print(self.transformed_labels_idx)

        self.contexts = rn.vocab_transform(self.contexts, self.vocab)
        self.questions = rn.vocab_transform(self.questions, self.vocab)
Exemplo n.º 3
0
    def __init__(self, data_bundle, vocabulary=None):#, c_len=None, q_len=None):
        (self.contexts, self.questions, self.choices, self.labels,
            self.choices_map, self.context_lens, self.qs_lens) = data_bundle
        if vocabulary:
            self.vocab = vocabulary
        else:
            self.vocab = rn.get_vocab(
                self.questions, self.contexts, min_frequency=10)
        self.vocab_size = len(self.vocab.vocabulary_)

        self.labels_idx = sorted(
            list(set([choice for choices in self.choices for choice in choices]))
        )
        self.transformed_labels_idx = [x[0] for x in\
                list(self.vocab.transform(self.labels_idx))]
        print(self.transformed_labels_idx)
        print([x for x in self.questions if '@placeholder' not in x.split(" ")])

        self.contexts = rn.vocab_transform(self.contexts, self.vocab)
        self.questions = rn.vocab_transform(self.questions, self.vocab)
        placeholder_token = ['@placeholder']
        placeholder_idx = rn.vocab_transform(placeholder_token,self.vocab)[0][0]
        print(placeholder_idx)
        print(len([x for x in self.questions if placeholder_idx in x]))
        self.placeholder_inds = np.array([list(x).index(placeholder_idx)\
                for x in self.questions]).astype(int)
        print(self.placeholder_inds.shape)
Exemplo n.º 4
0
def vocab_demo():
    v = rd.get_vocab('ptb.voc')
    items = [
        'the', 'dog', 'dogs', 'boy', 'boys', 'is', 'are', 'has', 'have', 'was',
        'were'
    ]
    for i in items:
        print(i, v.getid(i))
Exemplo n.º 5
0
def infer():
    args = parse_args()

    num_layers = args.num_layers
    src_vocab_size = args.vocab_size
    tar_vocab_size = args.vocab_size
    batch_size = args.batch_size
    init_scale = args.init_scale
    max_grad_norm = args.max_grad_norm
    hidden_size = args.hidden_size
    attr_init = args.attr_init
    latent_size = 32

    if args.enable_ce:
        fluid.default_main_program().random_seed = 102
        framework.default_startup_program().random_seed = 102

    model = VAE(hidden_size,
                latent_size,
                src_vocab_size,
                tar_vocab_size,
                batch_size,
                num_layers=num_layers,
                init_scale=init_scale,
                attr_init=attr_init)

    beam_size = args.beam_size
    trans_res = model.build_graph(mode='sampling', beam_size=beam_size)
    # clone from default main program and use it as the validation program
    main_program = fluid.default_main_program()

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = Executor(place)
    exe.run(framework.default_startup_program())

    dir_name = args.reload_model
    print("dir name", dir_name)
    dir_name = os.path.join(dir_name, "checkpoint")
    fluid.load(main_program, dir_name, exe)
    vocab, tar_id2vocab = get_vocab(args.dataset_prefix)
    infer_output = np.ones((batch_size, 1), dtype='int64') * BOS_ID

    fetch_outs = exe.run(feed={'tar': infer_output},
                         fetch_list=[trans_res.name],
                         use_program_cache=False)

    with io.open(args.infer_output_file, 'w', encoding='utf-8') as out_file:

        for line in fetch_outs[0]:
            end_id = -1
            if EOS_ID in line:
                end_id = np.where(line == EOS_ID)[0][0]
            new_line = [tar_id2vocab[e[0]] for e in line[1:end_id]]
            out_file.write(space_tok.join(new_line))
            out_file.write(line_tok)
Exemplo n.º 6
0
def main():
    import reader as reader
    from trainer import Configs
    from PDPATH import PDPATH

    ptb_vocab = get_vocab('ptb.voc')
    raw_test_data = reader.make_test(PDPATH('/RNN/test_data/coffee.txt'), ptb_vocab)

    test_input = TestData(config=Configs(),
                          test_data=raw_test_data,
                          vocab=ptb_vocab,
                          name="TestInput")
Exemplo n.º 7
0
    def __init__(self, config, data_path=None, vocabulary=None, name=None):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        raw_context, raw_questions, raw_choices, raw_labels, self.choices_map = \
                read.load_data(data_path)
        all_choices = read.build_choices(raw_choices)
        self.epoch_size = ((len(raw_context) // batch_size) - 1) // num_steps
        # build vocab for train data
        if not vocabulary:
            self.vocabulary = read.get_vocab(raw_questions,\
                    raw_context,min_frequency=500)
        else:
            self.vocabulary = vocabulary

        raw_choices = [" ".join(x) for x in raw_choices]
        self.all_choices = read.vocab_transform(all_choices, self.vocabulary)
        self.questions = read.vocab_transform(raw_questions, self.vocabulary)
        self.context = read.vocab_transform(raw_context, self.vocabulary)
        self.labels = read.vocab_transform(raw_labels, self.vocabulary)
        self.choices = read.vocab_transform([" ".join(x) for x in raw_choices],
                                            self.vocabulary)
Exemplo n.º 8
0
    def __init__(self, data_bundle, vocabulary=None):
        (contexts, questions, self.choices, self.labels, self.choices_map,
         self.context_lens, self.qs_lens) = data_bundle

        if vocabulary:
            self.vocab = vocabulary
        else:
            self.vocab = rn.get_vocab(questions,
                                      contexts,
                                      min_frequency=FLAGS.min_freq)
        self.vocab_size = len(self.vocab.vocabulary_)

        self.labels_idx = sorted(
            list(
                set([choice for choices in self.choices
                     for choice in choices])))

        contexts = rn.vocab_transform(contexts, self.vocab)
        self.contexts = rn.pad_eval(contexts, FLAGS.context_steps)

        questions = rn.vocab_transform(questions, self.vocab)
        self.questions = rn.pad_eval(questions, FLAGS.question_steps)
        level=logging.DEBUG,
        format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
        datefmt='%m-%d %H:%M',
        filename=datetime.now().strftime('mylogfile_%H_%M_%d_%m_%Y.log'),
        filemode='w')


if __name__ == "__main__":

    #    tf.reset_default_graph()
    options = get_params()
    root = "/Users/liuhongbing/Documents/tensorflow/data/snli_1.0/"
    train = [l.strip().split('\t') for l in open(root + 'snli_1.0_train.txt')]
    dev = [l.strip().split('\t') for l in open(root + 'snli_1.0_dev.txt')]
    test = [l.strip().split('\t') for l in open(root + 'snli_1.0_test.txt')]
    vocab = get_vocab(train)
    print("vocab (incr. maxfeatures accordingly):", len(vocab))

    X_train, Y_train, Z_train = load_data(train, vocab)
    X_dev, Y_dev, Z_dev = load_data(dev, vocab)
    X_test, Y_test, Z_test = load_data(test, vocab)
    print('Build model...')

    model = build_model(options)

    config_str = getConfig(options)
    MODEL_ARCH = root + "/Attention_neural/arch_att" + config_str + ".yaml"
    MODEL_WGHT = root + "/Attention_neural/weights_att" + config_str + ".weights"

    MAXLEN = options.xmaxlen
    X_train = pad_sequences(X_train,
Exemplo n.º 10
0
with open('POS/noun_inanim.txt', 'r') as file:
    all = file.readlines()
    iNs = all[0][:-2].split(sep=',')
    iNp = all[1].split(sep=',')

with open('POS/verb_trans.txt', 'r') as file:
    all = file.readlines()
    tVs = all[0][:-2].split(sep=',')
    tVp = all[1].split(sep=',')

with open('POS/verb_intrans.txt', 'r') as file:
    all = file.readlines()
    iVs = all[0][:-2].split(sep=',')
    iVp = all[1].split(sep=',')

with open('ptb_adjs.txt', 'r') as file:
    A = file.readline().split(sep=',')

sample = random.randint(20, 50)
print('Here are some singular samples:')
print(
    '  Anim: {}\n  Inanim: {}\n  Trans: {}\n  Intrans: {}\n  Adj: {}\n'.format(
        aNs[sample], iNs[sample], tVs[sample], iVs[sample], A[sample]))
print('And ere are some plural samples:')
print(
    '  Anim: {}\n  Inanim: {}\n  Trans: {}\n  Intrans: {}\n  Adj: {}\n'.format(
        aNp[sample], iNp[sample], tVp[sample], iVp[sample], A[sample + 1]))

V = rd.get_vocab('big_ptb.voc')
Exemplo n.º 11
0
from reader import load_data
from reader import get_vocab
from reader import vocab_transform
from reader import batch_iter


contexts, questions, choices, labels, choices_map, context_lens, qs_lens =\
    load_data(data_path="wdw/test")

# # 2. Fit vocabulary with questions and context.
vocab = get_vocab(contexts, questions)

# # 3. Transform context and questions
contexts = vocab_transform(contexts, vocab)
questions = vocab_transform(questions, vocab)

# 4. Give to batch_iter
readers = batch_iter(contexts, questions, choices, labels, choices_map,
                     context_lens, qs_lens)

# for q, c, ch, lab, ch_map, c_lens, q_lens in readers:
#     print(c.shape)
#     break