示例#1
0
def train(model: MemN2N, train_data, valid_data, config):
    """
    do train

    Args:
        model (MemN2N): the model to be evaluate
        train_data: training data
        valid_data: validating data
        config: model and training configs
    
    Returns:
        no return
    """
    lr = config.init_lr

    train_losses = []
    train_perplexities = []

    valid_losses = []
    valid_perplexities = []

    for epoch in range(1, config.nepoch + 1):
        train_loss = train_single_epoch(model, lr, train_data, config)
        valid_loss = eval(model, valid_data, config, "Validation")

        info = {'epoch': epoch, 'learning_rate': lr}

        # When the loss on the valid no longer drops, it's like learning rate divided by 1.5
        if len(valid_losses) > 0 and valid_loss > valid_losses[-1] * 0.9999:
            lr /= 1.5

        train_losses.append(train_loss)
        train_perplexities.append(math.exp(train_loss))

        valid_losses.append(valid_loss)
        valid_perplexities.append(math.exp(valid_loss))

        info["train_perplexity"] = train_perplexities[-1]
        info["validate_perplexity"] = valid_perplexities[-1]

        print(info)

        if epoch % config.log_epoch == 0:
            save_dir = os.path.join(config.checkpoint_dir, "model_%d" % epoch)
            paddle.save(model.state_dict(), save_dir)
            lr_path = os.path.join(config.checkpoint_dir, "lr_%d" % epoch)
            with open(lr_path, "w") as f:
                f.write(f"{lr}")

        # to get the target ppl
        if info["validate_perplexity"] < config.target_ppl:
            save_dir = os.path.join(config.checkpoint_dir, "model_good")
            paddle.save(model.state_dict(), save_dir)
            break

        if lr < 1e-5:
            break

    save_dir = os.path.join(config.checkpoint_dir, "model")
    paddle.save(model.state_dict(), save_dir)
示例#2
0
def train_single_epoch(model: MemN2N, lr, data, config):
    """
    train one epoch

    Args:
        model (MemN2N): model to be trained
        lr (float): the learning rate of this epoch
        data: training data
        config: configs

    Returns:
        float: average loss
    """
    model.train()
    N = int(math.ceil(len(data) / config.batch_size))  # total train N batchs

    clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=config.max_grad_norm)
    optimizer = paddle.optimizer.SGD(learning_rate=lr,
                                     parameters=model.parameters(),
                                     grad_clip=clip)
    lossfn = nn.CrossEntropyLoss(reduction='sum')

    total_loss = 0

    if config.show:
        ProgressBar = getattr(import_module('utils'), 'ProgressBar')
        bar = ProgressBar('Train', max=N)

    for batch in range(N):
        if config.show:
            bar.next()

        optimizer.clear_grad()
        context = np.ndarray([config.batch_size, config.mem_size],
                             dtype=np.int64)
        target = np.ndarray([config.batch_size], dtype=np.int64)
        for i in range(config.batch_size):
            m = random.randrange(config.mem_size, len(data))
            target[i] = data[m]
            context[i, :] = data[m - config.mem_size:m]

        batch_data = paddle.to_tensor(context)
        batch_label = paddle.to_tensor(target)

        preict = model(batch_data)
        loss = lossfn(preict, batch_label)
        loss.backward()
        optimizer.step()
        total_loss += loss

    if config.show:
        bar.finish()

    return total_loss / N / config.batch_size
示例#3
0
def eval(model: MemN2N, data, config, mode="Test"):
    """
    evaluate the model performance

    Args:
        model (MemN2N): the model to be evaluate
        data: evaluation data
        config: model and eval configs
        mode: Valid or Test
    
    Returns:
        average loss
    """
    model.eval()
    lossfn = nn.CrossEntropyLoss(reduction='sum')
    N = int(math.ceil(len(data) / config.batch_size))
    total_loss = 0

    context = np.ndarray([config.batch_size, config.mem_size], dtype=np.int64)
    target = np.ndarray([config.batch_size], dtype=np.int64)

    if config.show:
        ProgressBar = getattr(import_module('utils'), 'ProgressBar')
        bar = ProgressBar(mode, max=N - 1)

    m = config.mem_size
    for batch in range(N):
        if config.show:
            bar.next()

        for i in range(config.batch_size):
            if m >= len(data):
                break
            target[i] = data[m]
            context[i, :] = data[m - config.mem_size:m]
            m += 1
        if m >= len(data):
            break

        batch_data = paddle.to_tensor(context)
        batch_label = paddle.to_tensor(target)

        preict = model(batch_data)
        loss = lossfn(preict, batch_label)

        total_loss += loss

    if config.show:
        bar.finish()

    return total_loss / N / config.batch_size
def main(_):
    train_file = 'data/data_1_train.csv'
    source_count, target_count = [], []
    data = process_data.read_data(train_file)

    parsed_data = process_data.parse_data(data)

    source_word2idx, target_word2idx = create_vocab(parsed_data)

    #train_data = read_data(FLAGS.train_data, source_count, source_word2idx, target_count, target_word2idx)
    #test_data = read_data(FLAGS.test_data, source_count, source_word2idx, target_count, target_word2idx)

    trainData, testData = process_data.split_data(parsed_data, 80, 20)
    train_data = process_data.read_and_process_data(trainData, source_word2idx,
                                                    target_word2idx)
    test_data = process_data.read_and_process_data(testData, source_word2idx,
                                                   target_word2idx)
    FLAGS.pad_idx = source_word2idx['<pad>']
    FLAGS.nwords = len(source_word2idx)
    FLAGS.mem_size = train_data[
        4] if train_data[4] > test_data[4] else test_data[4]

    pp.pprint(flags.FLAGS.__flags)

    print('loading pre-trained word vectors...')
    FLAGS.pre_trained_context_wt = init_word_embeddings(source_word2idx)
    FLAGS.pre_trained_target_wt = init_word_embeddings(target_word2idx)

    with tf.Session() as sess:
        model = MemN2N(FLAGS, sess)
        model.build_model()
        model.run(train_data, test_data)
def main(_):
    source_word2idx, target_word2idx, word_set = {}, {}, {}
    max_sent_len = -1

    max_sent_len = get_dataset_resources(FLAGS.train_data, source_word2idx,
                                         target_word2idx, word_set,
                                         max_sent_len)
    max_sent_len = get_dataset_resources(FLAGS.test_data, source_word2idx,
                                         target_word2idx, word_set,
                                         max_sent_len)

    train_data = get_dataset(FLAGS.train_data, source_word2idx,
                             target_word2idx)
    test_data = get_dataset(FLAGS.test_data, source_word2idx, target_word2idx)

    # FLAGS.pad_idx = source_word2idx['<pad>']
    # FLAGS.nwords = len(source_word2idx)
    # FLAGS.mem_size = max_sent_len

    pp.pprint(flags.FLAGS.__flags)

    print('loading pre-trained word vectors...')
    print('loading pre-trained word vectors for train and test data')

    pre_trained_context_wt, pre_trained_target_wt = get_embedding_matrix(
        source_word2idx, target_word2idx, FLAGS.edim)

    with tf.Session() as sess:
        model = MemN2N(FLAGS, sess, pre_trained_context_wt,
                       pre_trained_target_wt, source_word2idx['<pad>'],
                       len(source_word2idx), max_sent_len)
        model.build_model()
        model.run(train_data, test_data)
示例#6
0
def main(_):
    count = []
    word2idx = {}

    if not os.path.exists(FLAGS.checkpoint_dir):
        os.makedirs(FLAGS.checkpoint_dir)

    train_data = read_data(
        '%s/%s.train.txt' % (FLAGS.data_dir, FLAGS.data_name), count, word2idx)
    valid_data = read_data(
        '%s/%s.valid.txt' % (FLAGS.data_dir, FLAGS.data_name), count, word2idx)
    test_data = read_data('%s/%s.test.txt' % (FLAGS.data_dir, FLAGS.data_name),
                          count, word2idx)

    idx2word = dict(zip(word2idx.values(), word2idx.keys()))
    FLAGS.nwords = len(word2idx)

    pp.pprint(flags.FLAGS.__flags)

    with tf.Session() as sess:
        model = MemN2N(FLAGS, sess)
        model.build_model()

        if FLAGS.is_test:
            model.run(valid_data, test_data)
        else:
            model.run(train_data, valid_data)
示例#7
0
def run(is_test=False):
    count = []
    word2idx = {}
    Config.is_test = is_test
    if not os.path.exists(Config.checkpoint_dir):
        os.makedirs(Config.checkpoint_dir)
    if not os.path.exists(Config.vector_dir):
        os.makedirs(Config.vector_dir)

    train_data = read_data(
        '%s/%s.train.txt' % (Config.data_dir, Config.data_name), count,
        word2idx)
    valid_data = read_data(
        '%s/%s.valid.txt' % (Config.data_dir, Config.data_name), count,
        word2idx)
    test_data = read_data(
        '%s/%s.test.txt' % (Config.data_dir, Config.data_name), count,
        word2idx)
    idx2word = dict(zip(word2idx.values(), word2idx.keys()))
    save_obj('%s/idx2word.pkl' % (Config.vector_dir), idx2word)
    save_obj('%s/word2idx.pkl' % (Config.vector_dir), word2idx)
    Config.nwords = len(word2idx)

    tf.reset_default_graph()
    with tf.Session() as sess:
        model = MemN2N(Config, sess, True)
        model.build_model()

        if Config.is_test:
            model.run(valid_data, test_data)
        else:
            model.run(train_data, valid_data)

        tf.summary.FileWriter("./logs", graph=tf.get_default_graph())
示例#8
0
def main():

    config = {
        'batch_size': 128,
        'emb_dim': 150,
        'mem_size': 100,
        'test': False,
        'n_epoch': 50,
        'n_hop': 6,
        'n_words': None,
        'lr': 0.001,
        'std_dev': 0.05,
        'cp_dir': 'checkpoints'
    }

    count = list()
    word2idx = dict()
    train_data = read_data('./data/ptb.train.txt', count, word2idx)
    valid_data = read_data('./data/ptb.valid.txt', count, word2idx)
    test_data = read_data('./data/ptb.test.txt', count, word2idx)

    config['n_words'] = len(word2idx)

    with tf.Session() as sess:
        print "Training..."
        mod = MemN2N(config, sess)
        mod.train(train_data, valid_data)
        mod.test(test_data)
示例#9
0
def main(_):

    word2idx = {}

    if not os.path.exists(FLAGS.checkpoint_dir):
      os.makedirs(FLAGS.checkpoint_dir)

    train_data,test_data = read_data( word2idx,FLAGS)
    train_data,valid_data = model_selection.train_test_split(train_data, test_size=.1)


    idx2word = dict(zip(word2idx.values(), word2idx.keys()))
    for i in range(FLAGS.mem_size):
        word2idx['time{}'.format(i + 1)] = 'time{}'.format(i + 1)
    FLAGS.nwords = len(word2idx)
    print ('train data len:',len(train_data))
    print ('valid data len:', len(valid_data))
    print ('voca len:',len(word2idx))
    print ('story sample:', np.array(train_data[0][0]))
    # pp.pprint(flags.FLAGS.__flags)

    with tf.Session() as sess:
        model = MemN2N(FLAGS, sess)
        model.build_model()

        if FLAGS.is_test:
            model.run(valid_data, test_data,idx2word,FLAGS)
        else:
            model.run(train_data, valid_data,idx2word,FLAGS)
    def __init__(self, config):
        self.eval_data = CBTestDataset(config.dataset_dir,
                                       config.word_type,
                                       perc_dict=config.perc_dict)
        self.eval_data.set_train_test(train=False)

        settings = {
            "use_cuda": config.cuda,
            "num_vocab": self.eval_data.num_vocab,
            "embedding_dim": 20,
            "sentence_size": self.eval_data.sentence_size,
            "max_hops": config.max_hops
        }

        print("Longest sentence length", self.eval_data.sentence_size)
        print("Longest story length", self.eval_data.max_story_size)
        print("Average story length", self.eval_data.mean_story_size)
        print("Number of vocab", self.eval_data.num_vocab)

        self.mem_n2n = MemN2N(settings)
        self.mem_n2n.load_state_dict(torch.load(config.check_point_path))
        self.mem_n2n.eval()
        print(self.mem_n2n)

        if config.cuda:
            self.mem_n2n = self.mem_n2n.cuda()

        self.start_epoch = 0
        self.config = config
示例#11
0
def main(_):
  source_count, target_count = [], []
  source_word2idx, target_word2idx, word_set = {}, {}, {}
  max_sent_len = -1
  
  max_sent_len = get_dataset_resources(FLAGS.train_data, source_word2idx, target_word2idx, word_set, max_sent_len)
  max_sent_len = get_dataset_resources(FLAGS.test_data, source_word2idx, target_word2idx, word_set, max_sent_len)
  embeddings = load_embedding_file(FLAGS.pretrain_file, word_set)

  train_data = get_dataset(FLAGS.train_data, source_word2idx, target_word2idx, embeddings)
  test_data = get_dataset(FLAGS.test_data, source_word2idx, target_word2idx, embeddings)

  print "train data size - ", len(train_data[0])
  print "test data size - ", len(test_data[0])

  print "max sentence length - ",max_sent_len
  FLAGS.pad_idx = source_word2idx['<pad>']
  FLAGS.nwords = len(source_word2idx)
  FLAGS.mem_size = max_sent_len

  pp.pprint(flags.FLAGS.__flags)

  print('loading pre-trained word vectors...')
  print('loading pre-trained word vectors for train and test data')
  
  FLAGS.pre_trained_context_wt, FLAGS.pre_trained_target_wt = get_embedding_matrix(embeddings, source_word2idx,  target_word2idx, FLAGS.edim)
  
  with tf.Session() as sess:
    model = MemN2N(FLAGS, sess)
    model.build_model()
    model.run(train_data, test_data)  
示例#12
0
def run(config):
    print("#! preparing data...")
    train_iter, valid_iter, test_iter, vocab = dataloader(
        config.batch_size, config.memory_size, config.task, config.joint,
        config.tenk)

    print("#! instantiating model...")
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = MemN2N(get_params(config), vocab).to(device)

    if config.file:
        with open(os.path.join(config.save_dir, config.file), 'rb') as f:
            if torch.cuda.is_available():
                state_dict = torch.load(
                    f, map_location=lambda storage, loc: storage.cuda())
            else:
                state_dict = torch.load(
                    f, map_location=lambda storage, loc: storage)
            model.load_state_dict(state_dict)

    if config.train:
        print("#! training...")
        optimizer = optim.Adam(model.parameters(), config.lr)
        train(train_iter, model, optimizer, config.num_epochs, config.max_clip,
              valid_iter)
        if not os.path.isdir(config.save_dir):
            os.makedirs(config.save_dir)
        torch.save(model.state_dict(),
                   os.path.join(config.save_dir, get_fname(config)))

    print("#! testing...")
    with torch.no_grad():
        eval(test_iter, model, config.task)
示例#13
0
def main(_):
    source_count, target_count = [], []
    source_word2idx, target_word2idx = {}, {}

    train_data = read_data(FLAGS.train_data, source_count, source_word2idx,
                           target_count, target_word2idx)
    test_data = read_data(FLAGS.test_data, source_count, source_word2idx,
                          target_count, target_word2idx)

    FLAGS.pad_idx = source_word2idx['<pad>']
    FLAGS.nwords = len(source_word2idx)
    FLAGS.mem_size = train_data[
        4] if train_data[4] > test_data[4] else test_data[4]

    pp.pprint(flags.FLAGS.__flags)

    print('loading pre-trained word vectors...')
    FLAGS.pre_trained_context_wt = init_word_embeddings(source_word2idx)
    FLAGS.pre_trained_target_wt = init_word_embeddings(target_word2idx)

    saver = tf.train.Saver()

    with tf.Session() as sess:
        model = MemN2N(FLAGS, sess)
        model.build_model()
        model.run(train_data, test_data)
示例#14
0
def main(_):
  with tf.Session(config=tf.ConfigProto(
    gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.5),
    device_count={'GPU': 1})) as sess:
    model = MemN2N(FLAGS, sess)
    model.build_model()

    model.test(example)
示例#15
0
    def __init__(self, config):

        if 'bAbI' in config.dataset_dir:
            self.train_data = bAbIDataset(config.dataset_dir, config.task)
            self.train_loader = DataLoader(self.train_data,
                                           batch_size=config.batch_size,
                                           num_workers=1,
                                           shuffle=True)

            self.test_data = bAbIDataset(config.dataset_dir,
                                         config.task,
                                         train=False)
            self.test_loader = DataLoader(self.test_data,
                                          batch_size=config.batch_size,
                                          num_workers=1,
                                          shuffle=False)
        elif 'CBTest' in config.dataset_dir:
            self.train_data = CBTestDataset(config.dataset_dir,
                                            config.word_type,
                                            perc_dict=config.perc_dict)
            print("Training set size: ", self.train_data.__len__())
            self.train_loader = DataLoader(self.train_data,
                                           batch_size=config.batch_size,
                                           num_workers=1,
                                           shuffle=True)

            self.test_data = copy.deepcopy(self.train_data)
            self.test_data.set_train_test(train=False)
            print("Testing set size: ", self.test_data.__len__())
            self.test_loader = DataLoader(self.test_data,
                                          batch_size=config.batch_size,
                                          num_workers=1,
                                          shuffle=False)

        settings = {
            "use_cuda": config.cuda,
            "num_vocab": self.train_data.num_vocab,
            "embedding_dim": 20,
            "sentence_size": self.train_data.sentence_size,
            "max_hops": config.max_hops
        }

        print("Longest sentence length", self.train_data.sentence_size)
        print("Longest story length", self.train_data.max_story_size)
        print("Average story length", self.train_data.mean_story_size)
        print("Number of vocab", self.train_data.num_vocab)

        self.mem_n2n = MemN2N(settings)
        self.ce_fn = nn.CrossEntropyLoss(size_average=False)
        self.opt = torch.optim.SGD(self.mem_n2n.parameters(), lr=config.lr)
        print(self.mem_n2n)

        if config.cuda:
            self.ce_fn = self.ce_fn.cuda()
            self.mem_n2n = self.mem_n2n.cuda()

        self.start_epoch = 0
        self.config = config
示例#16
0
def main(_):
    count = []
    with open('./processed/word2idx.pkl', 'rb') as f:
        word2idx = pickle.load(f)

    if not os.path.exists(FLAGS.checkpoint_dir):
        os.makedirs(FLAGS.checkpoint_dir)

    idx2word = dict(zip(word2idx.values(), word2idx.keys()))
    FLAGS.nwords = len(word2idx)
    pp.pprint(flags.FLAGS.__flags)

    #     train_data = read_data('%s/%s.train.txt' % (FLAGS.data_dir, FLAGS.data_name), count, word2idx)
    #     valid_data = read_data('%s/%s.valid.txt' % (FLAGS.data_dir, FLAGS.data_name), count, word2idx)
    #     test_data = read_data('%s/%s.test.txt' % (FLAGS.data_dir, FLAGS.data_name), count, word2idx)
    #     exit()
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.3 if FLAGS.inference else 0.6
    with tf.Session(config=config) as sess:
        model = MemN2N(FLAGS, sess)
        model.build_model()

        test_set_data = read_test_data(FLAGS.infer_set, word2idx)
        if FLAGS.inference:
            model.load()
            answer = model.inference(test_set_data, word2idx)
            import pandas as pd
            answer = pd.DataFrame(answer, columns=['answer'])
            answer.index += 1
            answer.to_csv('./guess/guess.csv', index_label='id')
        else:
            if FLAGS.restore:
                model.load()
            with open('./processed/all_train.pkl', 'rb') as f:
                train_data = pickle.load(f)
            with open('./processed/all_valid.pkl', 'rb') as f:
                valid_data = pickle.load(f)
            test_data = read_our_data(
                './data/CBData/cbtest_CN_test_2500ex.txt', count, word2idx)

            if FLAGS.is_test:
                print('Do not use --is_test True')
                exit()
                model.run(valid_data, test_data, word2idx, test_set_data)
            else:
                model.run(train_data, valid_data, word2idx, test_set_data)
示例#17
0
def main(_):
    word2idx = {}
    max_words = 0
    max_sentences = 0

    if not os.path.exists(FLAGS.checkpoint_dir):
        os.makedirs(FLAGS.checkpoint_dir)

    #train_stories, train_questions, max_words, max_sentences = read_data('{}/train.txt'.format(FLAGS.data_dir), word2idx, max_words, max_sentences)
    #valid_stories, valid_questions, max_words, max_sentences = read_data('{}/train.txt'.format(FLAGS.data_dir), word2idx, max_words, max_sentences)

    train_stories, train_questions, max_words, max_sentences = read_data(
        '{}/qa{}_single-supporting-fact_train.txt'.format(
            FLAGS.data_dir, FLAGS.babi_task), word2idx, max_words,
        max_sentences)
    valid_stories, valid_questions, max_words, max_sentences = read_data(
        '{}/qa{}_single-supporting-fact_test.txt'.format(
            FLAGS.data_dir, FLAGS.babi_task), word2idx, max_words,
        max_sentences)
    test_stories, test_questions, max_words, max_sentences = read_data(
        '{}/qa{}_single-supporting-fact_test.txt'.format(
            FLAGS.data_dir, FLAGS.babi_task), word2idx, max_words,
        max_sentences)

    pad_data(train_stories, train_questions, max_words, max_sentences)
    pad_data(valid_stories, valid_questions, max_words, max_sentences)
    pad_data(test_stories, test_questions, max_words, max_sentences)

    idx2word = dict(zip(word2idx.values(), word2idx.keys()))
    FLAGS.nwords = len(word2idx)
    FLAGS.max_words = max_words
    FLAGS.max_sentences = max_sentences

    pp.pprint(flags.FLAGS.__flags)

    with tf.Session() as sess:
        model = MemN2N(FLAGS, sess)
        model.build_model()

        if FLAGS.is_test:
            model.run(valid_stories, valid_questions, test_stories,
                      test_questions)
        else:
            model.run(train_stories, train_questions, valid_stories,
                      valid_questions)
示例#18
0
def run(context, question):
    word2idx = {}
    idx2word = {}

    idx2word = load_obj('%s/idx2word.pkl' % (Config.vector_dir), idx2word)
    word2idx = load_obj('%s/word2idx.pkl' % (Config.vector_dir), word2idx)
    context_data = read_txt(context, word2idx)
    question_data = read_txt(question, word2idx)
    Config.nwords = len(word2idx)

    tf.reset_default_graph()
    with tf.Session() as sess:
        model = MemN2N(Config, sess, False)
        model.build_model()
        results = model.predict(context_data, question_data)
        for result in results:
            print(' '.join(
                list(map(lambda x: idx2word.get(np.argmax(x)), result[0]))))
示例#19
0
文件: main.py 项目: Remeus/SeqMod
def main(_):
    count = [] # List of (word, count) for all the data
    word2idx = {} # Dict (word, ID) for all the data

    if not os.path.exists(FLAGS.checkpoint_dir):
      os.makedirs(FLAGS.checkpoint_dir)

    # Lists of word IDs
    if FLAGS.preloaded_data:
        with open('preloaded_telenor/train.pickle', 'rb') as f:
            train_data = pickle.load(f)
        with open('preloaded_telenor/val.pickle', 'rb') as f:
            valid_data = pickle.load(f)
            word2idx = pickle.load(f)
    else:
        train_data = read_data('%s/train.pickle' % FLAGS.data_dir, count, word2idx)
        valid_data = read_data('%s/val.pickle' % FLAGS.data_dir, count, word2idx)
        if FLAGS.is_test:
            test_data = read_data('%s/test.pickle' % FLAGS.data_dir, count, word2idx)

    idx2word = dict(zip(word2idx.values(), word2idx.keys()))
    FLAGS.nwords = len(word2idx)

    pp.pprint(flags.FLAGS.__flags)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:

        # Build the Memory Network
        model = MemN2N(FLAGS, sess)
        model.build_model()

        if len(FLAGS.infere) > 0:
            print('Make sure the training and validation data supplied are the same as during the training of the model (idx2word)')
            question = convert_question(FLAGS.infere, word2idx)
            model.infere(question, idx2word) # Prediction
        elif FLAGS.is_test:
            model.run(valid_data, test_data, idx2word) # Testing
        else:
            model.run(train_data, valid_data, idx2word) # Training
示例#20
0
def main(_):
    count = []
    word2idx = {}

    train_data = read_data(
        '%s/%s.train.txt' % (FLAGS.data_dir, FLAGS.data_name), count, word2idx)
    valid_data = read_data(
        '%s/%s.valid.txt' % (FLAGS.data_dir, FLAGS.data_name), count, word2idx)
    test_data = read_data('%s/%s.test.txt' % (FLAGS.data_dir, FLAGS.data_name),
                          count, word2idx)

    idx2word = dict(zip(word2idx.values(), word2idx.keys()))
    FLAGS.nwords = len(word2idx)

    pp.pprint(tf.app.flags.FLAGS.__flags)

    with tf.Session() as sess:
        model = MemN2N(FLAGS, sess)
        model.build_model()
        model.run(train_data, valid_data)
示例#21
0
    def __init__(self, config):
        self.train_data = bAbIDataset(config.dataset_dir, config.task)
        self.train_loader = DataLoader(self.train_data,
                                       batch_size=config.batch_size,
                                       num_workers=1,
                                       shuffle=True)

        self.test_data = bAbIDataset(config.dataset_dir,
                                     config.task,
                                     train=False)
        self.test_loader = DataLoader(self.test_data,
                                      batch_size=config.batch_size,
                                      num_workers=1,
                                      shuffle=False)

        settings = {
            "use_cuda": config.cuda,
            "num_vocab": self.train_data.num_vocab,
            "embedding_dim": 20,
            "sentence_size": self.train_data.sentence_size,
            "max_hops": config.max_hops
        }

        print("Longest sentence length", self.train_data.sentence_size)
        print("Longest story length", self.train_data.max_story_size)
        print("Average story length", self.train_data.mean_story_size)
        print("Number of vocab", self.train_data.num_vocab)

        self.mem_n2n = MemN2N(settings)
        self.ce_fn = nn.CrossEntropyLoss(size_average=False)
        self.opt = torch.optim.SGD(self.mem_n2n.parameters(),
                                   lr=config.lr,
                                   weight_decay=1e-5)
        print(self.mem_n2n)

        if config.cuda:
            self.ce_fn = self.ce_fn.cuda()
            self.mem_n2n = self.mem_n2n.cuda()

        self.start_epoch = 0
        self.config = config
示例#22
0
    args.task, len(train_data), len(test_data)))

settings = {
    "device": device,
    "num_vocab": train_data.num_vocab,
    "embedding_dim": args.embedding_dim,
    "sentence_size": train_data.sentence_size,
    "max_hops": args.max_hops
}
print("Longest sentence length", train_data.sentence_size)
print("Longest story length", train_data.max_story_size)
print("Average story length", train_data.mean_story_size)
print("Number of vocab", train_data.num_vocab)

torch.manual_seed(args.random_state)
mem_n2n = MemN2N(settings)
criterion = nn.CrossEntropyLoss(reduction='sum')
opt = torch.optim.SGD(mem_n2n.parameters(), lr=args.lr)
print(mem_n2n)

mem_n2n = mem_n2n.to(device)

for epoch in range(1, args.epochs + 1):
    # train single epoch
    total_loss = 0.
    correct = 0
    for step, (story, query, answer) in enumerate(train_loader):
        story, query, answer = story.to(device), query.to(device), answer.to(
            device)
        logits = mem_n2n(story, query)
        preds = logits.argmax(dim=1)
示例#23
0
            break

        if lr < 1e-5:
            break

    save_dir = os.path.join(config.checkpoint_dir, "model")
    paddle.save(model.state_dict(), save_dir)


if __name__ == '__main__':
    config = Config('config.yaml')

    if not os.path.exists(config.checkpoint_dir):
        os.makedirs(config.checkpoint_dir)

    word2idx, train_data, valid_data, test_data = load_data(config)
    idx2word = dict(zip(word2idx.values(), word2idx.keys()))
    config.nwords = len(word2idx)
    print("vacab size is %d" % config.nwords)

    np.random.seed(config.srand)
    random.seed(config.srand)
    paddle.seed(config.srand)

    model = MemN2N(config)
    if config.recover_train:
        model_path = os.path.join(config.checkpoint_dir, config.model_name)
        state_dict = paddle.load(model_path)
        model.set_dict(state_dict)
    train(model, train_data, valid_data, config)
示例#24
0
def main(_):
    word2idx = {}
    cand2idx = {}
    max_words = 0
    max_sentences = 0

    if not os.path.exists(FLAGS.checkpoint_dir):
        os.makedirs(FLAGS.checkpoint_dir)

    train_stories, train_questions, max_words, max_sentences = read_data(
        'dstc/out_task4_train85.json',
        word2idx,
        cand2idx,
        max_words,
        max_sentences,
        test_flag=False)
    valid_stories, valid_questions, max_words, max_sentences = read_data(
        'dstc/out_task4_valid15.json',
        word2idx,
        cand2idx,
        max_words,
        max_sentences,
        test_flag=False)
    test_stories, test_questions, max_words, max_sentences = read_data(
        'dstc/out_dialog-task4INFOS-kb2_atmosphere_restrictions-distr0.5-tst1000.json',
        word2idx,
        cand2idx,
        max_words,
        max_sentences,
        test_flag=True)

    pad_data(train_stories,
             train_questions,
             max_words,
             max_sentences,
             test_flag=False)
    pad_data(valid_stories,
             valid_questions,
             max_words,
             max_sentences,
             test_flag=False)
    pad_data(test_stories,
             test_questions,
             max_words,
             max_sentences,
             test_flag=True)

    idx2word = dict(zip(word2idx.values(), word2idx.keys()))
    idx2cand = dict(zip(cand2idx.values(), cand2idx.keys()))
    FLAGS.nwords = len(word2idx)
    FLAGS.ncands = len(cand2idx)
    FLAGS.max_words = max_words
    FLAGS.max_sentences = max_sentences

    pp.pprint(flags.FLAGS.__flags)

    with tf.Session() as sess:
        model = MemN2N(FLAGS, sess)
        model.build_model()

        if FLAGS.is_test:
            model.run(valid_stories,
                      valid_questions,
                      test_stories,
                      test_questions,
                      idx2cand,
                      answer_flag=False)
        else:
            model.run(train_stories,
                      train_questions,
                      valid_stories,
                      valid_questions,
                      idx2cand,
                      answer_flag=True)

        prediction_test_valid = 0

        if prediction_test_valid == 1:
            predictions, target = model.predict(valid_stories, valid_questions)

            correct_num = 0
            #print(len(valid_questions))
            for i in range(len(valid_questions)):
                index = i
                #depad_data(valid_stories, valid_questions)

                #question = valid_questions[index]['question']
                answer = valid_questions[index]['answer']['utterance']
                cand = valid_questions[index]['cand']
                #story_index = valid_questions[index]['story_index']
                #sentence_index = valid_questions[index]['sentence_index']

                #story = valid_stories[story_index][:sentence_index + 1]

                #story = [list(map(idx2word.get, sentence)) for sentence in story]
                #question = list(map(idx2word.get, question))
                #prediction = idx2cand[np.argmax(predictions[index])]
                pred_sorted = np.argsort(predictions[index][-FLAGS.ncands:])
                pred_sorted = pred_sorted[::-1]
                cand_list = []
                prediction = None
                for c in cand:
                    cand_list.append(idx2cand.get(c['utterance']))
                for pred in pred_sorted:
                    if idx2cand[pred] in cand_list:
                        prediction = idx2cand[pred]
                        break
                answer = idx2cand.get(answer)

                #print('Story:')
                #pp.pprint(story)
                #print('\nQuestion:')
                #pp.pprint(question)
                #print('\nPrediction:')
                #pp.pprint(prediction)
                #print('\nAnswer:')
                #pp.pprint(answer)
                #print('\ncandidates')
                #pp.pprint(cand_list)
                #print('\nCorrect:')
                #pp.pprint(prediction == answer)
                if prediction == answer:
                    correct_num += 1
            print('case: ' + str(len(valid_questions)) + '  correct_num: ' +
                  str(correct_num))
            print('acc - ' + str(correct_num / len(valid_questions) * 100))
        else:
            predictions, target = model.predict(test_stories, test_questions)

            correct_num = 0
            #print(len(valid_questions))
            responses = []
            for i in range(len(test_questions)):
                index = i
                dict_answer_current = {}
                dict_answer_current['dialog_id'] = test_questions[index][
                    'dialog_id']
                candidate_rank = []
                #depad_data(valid_stories, valid_questions)

                #question = valid_questions[index]['question']
                #answer = test_questions[index]['answer']['utterance']
                cand = test_questions[index]['cand']
                #story_index = valid_questions[index]['story_index']
                #sentence_index = valid_questions[index]['sentence_index']

                #story = valid_stories[story_index][:sentence_index + 1]

                #story = [list(map(idx2word.get, sentence)) for sentence in story]
                #question = list(map(idx2word.get, question))
                #prediction = idx2cand[np.argmax(predictions[index])]
                pred_sorted = np.argsort(predictions[index][-FLAGS.ncands:])
                pred_sorted = pred_sorted[::-1]
                cand_list = []
                prediction = None
                for c in cand:
                    cand_list.append(idx2cand.get(c['utterance']))
                crank = 1
                flag = 0
                for pred in pred_sorted:
                    if idx2cand[pred] in cand_list:
                        if flag == 0:
                            prediction = idx2cand[pred]
                            flag = 1
                        for c in cand:
                            if c['utterance'] == pred:
                                #print(idx2cand.get(c['utterance']))
                                candidate_rank.append({
                                    "candidate_id":
                                    c['candidate_id'],
                                    "rank":
                                    crank
                                })
                                crank = crank + 1
                                if crank == 11:
                                    break
                        if crank == 11:
                            break
                dict_answer_current['lst_candidate_id'] = candidate_rank
                responses.append(dict_answer_current)
                #answer = idx2cand.get(answer)

                #print('Story:')
                #pp.pprint(story)
                #print('\nQuestion:')
                #pp.pprint(question)
                #print('\nPrediction:')
                #pp.pprint(prediction)
                #print('\nAnswer:')
                #pp.pprint(answer)
                #print('\ncandidates')
                #pp.pprint(cand_list)
                #print('\nCorrect:')
                #pp.pprint(prediction == answer)
                #if prediction == answer:
                #    correct_num += 1
            fdout = open(
                "dialog-task4INFOS-kb2_atmosphere_restrictions-distr0.5-tst1000.answer.json",
                "w")
            json.dump(responses, fdout)
            fdout.close()
示例#25
0
    parser.add_argument('model_dir', type=str, help='trained model path')
    parser.add_argument('test_path', type=str, help='test data path')

    parser.add_argument('--maxmemsize',
                        type=int,
                        metavar='N',
                        default=100,
                        help='memory capacity')

    args = parser.parse_args()

    # loading vocabularies and the trained model

    dialog_vocab = Vocab.load(os.path.join(args.model_dir, 'dialog_vocab'))
    candidates_vocab = Vocab.load(
        os.path.join(args.model_dir, 'candidates_vocab'))

    model = MemN2N.load(os.path.join(args.model_dir, 'model'))

    test_data_reader_per_resp = DialogReader(args.test_path, dialog_vocab,
                                             candidates_vocab, args.maxmemsize,
                                             1, False, False, False)
    test_data_reader_per_dial = DialogReader(args.test_path, dialog_vocab,
                                             candidates_vocab, args.maxmemsize,
                                             1, False, False, True)

    print("Per Response Accuracy: ",
          calc_accuracy_per_response(model, test_data_reader_per_resp, False))
    print("Per Dialog Accuracy: ",
          calc_accuracy_per_dialog(model, test_data_reader_per_dial))
    gr_train.add_argument('--shuffle', action="store_true", default=True, help='shuffle batches before every epoch')
    gr_train.add_argument('--save_dir', type=str, default=None, help='path to save the model')

    args = parser.parse_args()

    # build data, initialize model and start training.

    dialog_vocab, candidates_vocab = build_dialog_vocab(args.train_path, args.candidates_path, 1000)    

    trn_data_reader = DialogReader(args.train_path, dialog_vocab, candidates_vocab, args.maxmemsize, args.batchsize, False, args.shuffle, False)
    dev_data_reader = DialogReader(args.dev_path, dialog_vocab, candidates_vocab, args.maxmemsize, args.batchsize, False, False, False)

    candidate_vecs = Variable(trn_data_reader._candidate_vecs)
    candidate_vecs = candidate_vecs.cuda() if args.gpu else candidate_vecs

    model = MemN2N(args.edim, len(trn_data_reader._dialog_vocab), candidate_vecs, args.nhops, args.init_std)

    if args.gpu:
        model.cuda()

    train(model, trn_data_reader, dev_data_reader, args.epochs, args.lr, args.decay_factor, args.decay_every, args.maxgradnorm, 50, 500, args.gpu)
    
    # saving trained model and vocabularies.

    save_dir = args.save_dir
    if not save_dir:
        save_dir = os.getcwd()

    save_dir = os.path.join(save_dir, 'model_' + str(time.time()))

    if not os.path.exists(save_dir):
示例#27
0
train_labels = np.argmax(trainA, axis=1)
test_labels = np.argmax(testA, axis=1)
val_labels = np.argmax(valA, axis=1)

tf.set_random_seed(FLAGS.random_state)
batch_size = FLAGS.batch_size

batches = zip(range(0, n_train - batch_size, batch_size),
              range(batch_size, n_train, batch_size))
batches = [(start, end) for start, end in batches]

with tf.Session() as sess:
    model = MemN2N(batch_size,
                   vocab_size,
                   sentence_size,
                   memory_size,
                   FLAGS.embedding_size,
                   session=sess,
                   hops=FLAGS.hops,
                   max_grad_norm=FLAGS.max_grad_norm)

    for i in range(1, FLAGS.epochs + 1):
        if i - 1 <= FLAGS.anneal_stop_epoch:
            anneal = 2.0**((i - 1) // FLAGS.anneal_rate)
        else:
            anneal = 2.0**(FLAGS.anneal_stop_epoch // FLAGS.anneal_rate)
        lr = FLAGS.learning_rate / anneal

        np.random.shuffle(batches)
        total_cost = 0.0

        for start, end in batches:
示例#28
0
def main(_):
    word2idx = {}
    max_words = 0
    max_sentences = 0

    checkpoint_dir = "./checkpoints"
    data_dir = "./bAbI/en-valid"
    babi_task = 1
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)

    #train_stories, train_questions, max_words, max_sentences = read_all_data('{}/qa{}_train.txt'.format(data_dir, babi_task), word2idx, max_words, max_sentences)
    #valid_stories, valid_questions, max_words, max_sentences = read_all_data('{}/qa{}_valid.txt'.format(data_dir, babi_task), word2idx, max_words, max_sentences)
    #test_stories, test_questions, max_words, max_sentences = read_all_data('{}/qa{}_test.txt'.format(data_dir, babi_task), word2idx, max_words, max_sentences)
    train_stories, train_questions, max_words, max_sentences = read_all_data(
        'train', word2idx, max_words, max_sentences)
    valid_stories, valid_questions, max_words, max_sentences = read_all_data(
        'valid', word2idx, max_words, max_sentences)
    test_stories, test_questions, max_words, max_sentences = read_all_data(
        'test', word2idx, max_words, max_sentences)

    pad_data(train_stories, train_questions, max_words, max_sentences)
    pad_data(valid_stories, valid_questions, max_words, max_sentences)
    pad_data(test_stories, test_questions, max_words, max_sentences)

    idx2word = dict(zip(word2idx.values(), word2idx.keys()))
    #FLAGS.nwords = len(word2idx)
    #FLAGS.max_words = max_words
    #FLAGS.max_sentences = max_sentences

    #pp.pprint(flags.FLAGS.__flags)
    print(word2idx)
    is_test = True
    with tf.Session() as sess:
        model = MemN2N(is_test, len(word2idx), max_words, max_sentences, sess)
        model.build_model()

        if is_test:
            model.run(valid_stories, valid_questions, test_stories,
                      test_questions)
        else:
            model.run(train_stories, train_questions, valid_stories,
                      valid_questions)

        #predictions, target = model.predict(test_stories, test_questions)
        #cnt = 0
        #for i in range(len(target)):
        #print(idx2word[np.argmax(predictions[i])],idx2word[np.argmax(target[i])])
        #if np.argmax(predictions[i])==np.argmax(target[i]):
        #    cnt+=1
        #print("Test set accuracy ",cnt/len(target))
        print(word2idx)
        idx2word = dict(zip(word2idx.values(), word2idx.keys()))
        stry = input("Enter the story: ")
        flag = 'y'
        while flag == 'y':
            que = input("Enter the quest: ")
            print(stry, type(stry), que, type(que))
            story, quest = read_data_story(stry.lower(), que.lower(), word2idx,
                                           max_sentences, max_words)
            pad_data(story, quest, max_words, max_sentences)
            #print(story,quest,word2idx)
            prediction, target1 = model.predict(story, quest)
            print(idx2word[np.argmax(prediction[0])])
            flag = input('You want to continue: y or n ')
示例#29
0
def main(_):
    source_count, target_count = [], []
    source_word2idx, target_word2idx, word_set = {}, {}, {}
    max_sent_len = -1

    max_sent_len = get_dataset_resources(FLAGS.train_data, source_word2idx,
                                         target_word2idx, word_set,
                                         max_sent_len)
    max_sent_len = get_dataset_resources(FLAGS.test_data, source_word2idx,
                                         target_word2idx, word_set,
                                         max_sent_len)
    max_sent_len_predict = get_dataset_resources_test(FLAGS.predict_data,
                                                      source_word2idx,
                                                      target_word2idx,
                                                      word_set, max_sent_len)
    embeddings = load_embedding_file(FLAGS.pretrain_file, word_set)

    # test_data = get_dataset(FLAGS.test_data, source_word2idx, target_word2idx, embeddings, MODE='test')

    print("max sentence length - " + str(max_sent_len))
    FLAGS.pad_idx = source_word2idx['<pad>']
    FLAGS.nwords = len(source_word2idx)
    FLAGS.mem_size = max_sent_len

    pp.pprint(flags.FLAGS.__flags)

    print('loading pre-trained word vectors...')
    print('loading pre-trained word vectors for train and test data')

    FLAGS.pre_trained_context_wt, FLAGS.pre_trained_target_wt = get_embedding_matrix(
        embeddings, source_word2idx, target_word2idx, FLAGS.edim)

    N_FOLDS = 2
    skf = StratifiedKFold(N_FOLDS, shuffle=True, random_state=1000)
    train_data = get_dataset(FLAGS.train_data,
                             source_word2idx,
                             target_word2idx,
                             embeddings,
                             MODE='train')
    predict_data, raw_predict_data = get_dataset_test(FLAGS.predict_data,
                                                      source_word2idx,
                                                      target_word2idx,
                                                      embeddings)
    # source_data_predict, source_loc_data_predict, target_data_predict = predict_data
    source_data, source_loc_data, target_data, target_label = train_data

    X = np.column_stack((source_data, source_loc_data, target_data))
    y = np.array(target_label)

    # Use this for SKF validation
    # for j, (train_idx, test_idx) in enumerate(skf.split(X, y)):
    #     X_train, y_train = X[train_idx], y[train_idx]
    #     X_test, y_test = X[test_idx], y[test_idx]

    #     train_data_inner = (X_train[:,0], X_train[:,1], X_train[:,2], y_train)
    #     test_data_inner = (X_test[:,0], X_test[:,1], X_test[:,2], y_test)

    #     tf.reset_default_graph()
    #     with tf.Session() as sess:
    #         model = MemN2N(FLAGS, sess)
    #         model.build_model()
    #  saver = tf.train.Saver()
    #         model.run(train_data_inner, test_data_inner)
    #         saver.save(sess, './memnet', global_step=1000)
    # # for i in 10, do <-. Before model =...use tf.reset_default_graph

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    train_data_inner = (X_train[:, 0], X_train[:, 1], X_train[:, 2], y_train)
    test_data_inner = (X_test[:, 0], X_test[:, 1], X_test[:, 2], y_test)
    with tf.Session() as sess:
        model = MemN2N(FLAGS, sess)
        model.build_model()
        # saver = tf.train.Saver()
        model.run(train_data_inner, test_data_inner, predict_data,
                  raw_predict_data)
示例#30
0
def main(_):
  source_count, target_count = [], []
  source_word2idx, target_word2idx, word_set = {}, {}, {}
  max_sent_len = -1
  
  max_sent_len = get_dataset_resources(FLAGS.train_data, source_word2idx, target_word2idx, word_set, max_sent_len)
  max_sent_len = get_dataset_resources(FLAGS.test_data, source_word2idx, target_word2idx, word_set, max_sent_len)
  
  # embeddings = load_embedding_file(FLAGS.pretrain_file, word_set)
  print "Embeddings Loaded"
 

  '''
  #uncomment for the first run
  #required for generating data in the pickle format


  train_data = get_dataset(FLAGS.train_data, source_word2idx, target_word2idx, embeddings)
  test_data = get_dataset(FLAGS.test_data, source_word2idx, target_word2idx, embeddings)
  
  pkl.dump(train_data, open('train_data_restaurant.pkl', 'w'))
  pkl.dump(test_data, open('test_data_restaurant.pkl', 'w'))

  # pkl.dump(train_data, open('train_data_laptop.pkl', 'w'))
  # pkl.dump(test_data, open('test_data_laptop.pkl', 'w'))
  
  print "Dump Success!!!"
  
  return
  '''

  #Loading the the data generated

  train_data = pkl.load(open('train_data_laptop.pkl', 'r'))
  # train_data = pkl.load(open('train_data_extra.pkl', 'r'))
  # train_data = pkl.load(open('train_data_restaurant_clean.pkl', 'r'))

  test_data = pkl.load(open('test_data_laptop.pkl', 'r'))
  # test_data = pkl.load(open('test_data_extra.pkl', 'r'))
  # test_data = pkl.load(open('test_data_restaurant_clean.pkl', 'r'))
  print "Dump Loaded!!!"

  #uncomment for Rul + Con 
  #Concatenates the Wma from the consTree to the Wrm for (Rul + con) method, 
  #Requires that the data in both in indexed-wise matched already

  GraphMemNetData = pkl.load(open('TOTAL_LAT_const_laptop.pkl','r'))
  # GraphMemNetData = pkl.load(open('TOTAL_data_restaurant_clean.pkl','r'))

  Wma_train = GraphMemNetData[0][6]
  Wma_test = GraphMemNetData[1][6]
  
  Wrm = train_data[5]
  for index, wma in enumerate(Wma_train):
    wam = np.reshape(wma,(1,-1))
    Wrm[index] = np.concatenate((Wrm[index], wam),axis=0)
  
  Wrm = test_data[5]
  for index, wma in enumerate(Wma_test):
    wam = np.reshape(wma,(1,-1))
    Wrm[index] = np.concatenate((Wrm[index], wam),axis=0)
  

  print "train data size - ", len(train_data[0])
  print "test data size - ", len(test_data[0])

  print "max sentence length - ",max_sent_len
  FLAGS.pad_idx = source_word2idx['<pad>']
  FLAGS.nwords = len(source_word2idx)
  FLAGS.mem_size = max_sent_len

  pp.pprint(flags.FLAGS.__flags)

  print('loading pre-trained word vectors...')
  print('loading pre-trained word vectors for train and test data')
  
  # pre_trained_context_wt, pre_trained_target_wt = get_embedding_matrix(embeddings, source_word2idx,  target_word2idx, FLAGS.edim)
  
  pre_trained_context_wt, pre_trained_target_wt = GraphMemNetData[2], GraphMemNetData[3] 
  
  with tf.Session() as sess:
    model = MemN2N(FLAGS, sess, pre_trained_context_wt, pre_trained_target_wt)
    model.build_model()
    model.run(train_data, test_data)